Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added MinMax algorithm for OpenVINO backend #1444

Merged
merged 33 commits into from
Jan 19, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
rebase
  • Loading branch information
l-bat committed Jan 18, 2023
commit 41fa4c45548d21254d2f446e9b8a69dbeb3e1fa2
Original file line number Diff line number Diff line change
Expand Up @@ -48,65 +48,3 @@ def __lt__(self, other: 'OVTargetPoint') -> bool:
if self.__getattribute__(param) > other.__getattribute__(param):
return False
return False


class OVQuantizerLayerParameters:
"""
Class handles FakeQuantize op attributes.
"""
def __init__(self,
statistics: Union[MinMaxTensorStatistic, np.ndarray],
quantizer_config: QuantizerConfig):
# initialize_quantizer_parameters(statistics, quantizer_config)
if isinstance(statistics, MinMaxTensorStatistic):
self.input_low = np.array(statistics.min_values)
self.input_high = np.array(statistics.max_values)
else:
per_channel = quantizer_config.per_channel
axes = tuple(range(len(statistics.shape))[1:]) if per_channel else None
self.input_low = np.amin(statistics, axis=axes)
self.input_high = np.amax(statistics, axis=axes)

self.levels = 2 ** quantizer_config.num_bits
if quantizer_config.mode == QuantizationMode.SYMMETRIC:
self.output_low = np.full_like(self.input_low, fill_value=-self.levels / 2)
self.output_high = np.full_like(self.input_high, fill_value=self.levels / 2 - 1)
else:
self.output_low = np.zeros_like(self.input_low)
self.output_high = np.full_like(self.input_high, fill_value=self.levels - 1)

class OVInsertionCommand(TransformationCommand):
def __init__(self, target_point: OVTargetPoint):
super().__init__(TransformationType.INSERT, target_point)

def union(self, other: 'TransformationCommand') -> 'TransformationCommand':
# Have a look at nncf/torch/graph/transformations/commands/PTInsertionCommand
raise NotImplementedError()


class OVOutputInsertionCommand(OVInsertionCommand):
def union(self, other: 'TransformationCommand') -> 'TransformationCommand':
# Have a look at nncf/torch/graph/transformations/commands/PTInsertionCommand
raise NotImplementedError()


class OVFQNodeRemovingCommand(TransformationCommand):
"""
Removes FakeQuantize nodes from the model.
"""

def __init__(self, target_point: OVTargetPoint):
"""
:param target_point: The TargetPoint instance for the layer that contains information for removing.
"""
super().__init__(TransformationType.REMOVE, target_point)


class OVQuantizerInsertionCommand(OVInsertionCommand):
def __init__(self, target_point: OVTargetPoint, quantizer_parameters: OVQuantizerLayerParameters):
super().__init__(target_point)
self.quantizer_parameters = quantizer_parameters

def union(self, other: 'TransformationCommand') -> 'TransformationCommand':
# Have a look at nncf/torch/graph/transformations/commands/PTInsertionCommand
raise NotImplementedError()
Original file line number Diff line number Diff line change
Expand Up @@ -22,20 +22,20 @@
from nncf.common.graph.transformations.commands import TargetType
from nncf.common.hardware.config import HWConfig
from nncf.common.quantization.structs import QuantizerConfig
# from nncf.common.quantization.structs import QuantizationMode
from nncf.common.tensor_statistics.collectors import ReductionShape
from nncf.common.tensor_statistics.statistics import MinMaxTensorStatistic
from nncf.common.utils.backend import BackendType

from nncf.experimental.openvino_native.graph.metatypes.openvino_metatypes import GENERAL_WEIGHT_LAYER_METATYPES
from nncf.experimental.openvino_native.graph.transformations.commands import OVQuantizerInsertionCommand
from nncf.experimental.openvino_native.graph.transformations.commands import OVTargetPoint
from nncf.experimental.openvino_native.graph.transformations.commands import OVQuantizerLayerParameters
from nncf.experimental.openvino_native.graph.model_transformer import OVModelTransformer

from nncf.experimental.openvino_native.hardware.config import OVHWConfig
from nncf.experimental.openvino_native.hardware.fused_patterns import OPENVINO_HW_FUSED_PATTERNS
from nncf.experimental.openvino_native.quantization.default_quantization import DEFAULT_OV_QUANT_TRAIT_TO_OP_DICT
from nncf.experimental.openvino_native.quantization.quantizer_parameters import calculate_activation_quantizer_parameters
from nncf.experimental.openvino_native.quantization.quantizer_parameters import calculate_weight_quantizer_parameters

from nncf.experimental.openvino_native.statistics.collectors import OVMeanMinMaxStatisticCollector
from nncf.experimental.openvino_native.statistics.collectors import OVMinMaxStatisticCollector
Expand Down Expand Up @@ -79,11 +79,20 @@ def target_point(target_type: TargetType,
return OVTargetPoint(target_type, target_node_name, port_id)

@staticmethod
def quantizer_insertion_command(target_point: OVTargetPoint,
quantizer_config: QuantizerConfig,
statistics: Union[MinMaxTensorStatistic, np.ndarray],
) -> OVQuantizerInsertionCommand:
parameters = OVQuantizerLayerParameters(statistics, quantizer_config)
def create_activation_quantizer_insertion_command(target_point: OVTargetPoint,
quantizer_config: QuantizerConfig,
statistics: MinMaxTensorStatistic) \
-> OVQuantizerInsertionCommand:
parameters = calculate_activation_quantizer_parameters(statistics, quantizer_config)
return OVQuantizerInsertionCommand(target_point, parameters)

@staticmethod
def create_weight_quantizer_insertion_command(target_point: OVTargetPoint,
quantizer_config: QuantizerConfig,
weight_tensor: np.ndarray,
node: NNCFNode) -> OVQuantizerInsertionCommand:
axis = node.metatype.weight_definitions.weight_channel_axis if quantizer_config.per_channel else None
parameters = calculate_weight_quantizer_parameters(weight_tensor, quantizer_config, axis)
return OVQuantizerInsertionCommand(target_point, parameters)

@staticmethod
Expand Down Expand Up @@ -120,29 +129,3 @@ def get_weight_tensor_port_id(model: ov.Model, node: NNCFNode) -> int:
@staticmethod
def get_weight_config(config: QuantizerConfig, model: ov.Model) -> QuantizerConfig:
return config


# class OVQuantizerLayerParameters:
# """
# Class handles FakeQuantize op attributes.
# """
# def __init__(self,
# statistics: Union[MinMaxTensorStatistic, np.ndarray],
# quantizer_config: QuantizerConfig):
# # initialize_quantizer_parameters(statistics, quantizer_config)
# if isinstance(statistics, MinMaxTensorStatistic):
# self.input_low = np.array(statistics.min_values)
# self.input_high = np.array(statistics.max_values)
# else:
# per_channel = quantizer_config.per_channel
# axes = tuple(range(len(statistics.shape))[1:]) if per_channel else None
# self.input_low = np.amin(statistics, axis=axes)
# self.input_high = np.amax(statistics, axis=axes)

# self.levels = 2 ** quantizer_config.num_bits
# if quantizer_config.mode == QuantizationMode.SYMMETRIC:
# self.output_low = np.full_like(self.input_low, fill_value=-self.levels / 2)
# self.output_high = np.full_like(self.input_high, fill_value=self.levels / 2 - 1)
# else:
# self.output_low = np.zeros_like(self.input_low)
# self.output_high = np.full_like(self.input_high, fill_value=self.levels - 1)
78 changes: 0 additions & 78 deletions nncf/quantization/algorithms/min_max/onnx_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,9 @@
from nncf.common.graph.graph import NNCFNode
from nncf.common.graph.operator_metatypes import OperatorMetatype
from nncf.common.graph.patterns import HWFusedPatterns
from nncf.common.graph.transformations.commands import TargetPoint
from nncf.common.graph.transformations.commands import TargetType
from nncf.common.hardware.config import HWConfig
from nncf.common.quantization.structs import QuantizerConfig
from nncf.common.quantization.structs import QuantizationMode
from nncf.common.tensor_statistics.collectors import ReductionShape
from nncf.common.tensor_statistics.statistics import MinMaxTensorStatistic
from nncf.common.utils.backend import BackendType
Expand Down Expand Up @@ -136,79 +134,3 @@ def get_weight_config(config: QuantizerConfig, model: onnx.ModelProto) -> Quanti
"will not use per-channel quantization because it is not supported in this opset.")

return config


class ONNXQuantizerLayerParameters:
"""
Class handles Quantizer/Dequantizer layer attributes.
"""

def __init__(self, scale: List[float], zero_point: List[int], mode: QuantizationMode, axis: Optional[int]):
self.scale = scale
self.zero_point = zero_point
self.mode = mode
self.axis = axis


def calculate_scale_level(max_val: Union[float, np.ndarray], min_val: Union[float, np.ndarray],
num_bits: int,
mode: QuantizationMode) -> Union[float, np.ndarray]:
"""
Calculates Quantizer/Dequantizer layer scale level.
"""
if mode == QuantizationMode.SYMMETRIC:
input_abs_max = np.maximum(np.abs(max_val), np.abs(min_val))
return input_abs_max / ((2 ** num_bits - 1) / 2)
return (max_val - min_val) / 2 ** num_bits


def calculate_weight_quantizer_parameters(weight_tensor: np.ndarray, quantizer_config: QuantizerConfig,
axis: Optional[int]) -> ONNXQuantizerLayerParameters:
"""
Calculates Quantizer/Dequantizer layer attributes for weight quantizer such as scale, zero_points and
quantization mode: symmetric, asymmetric.
:param weight_tensor: Weight tensor to calculate quantizer attributes.
:param quantizer_config: Config of Quantizer.
:param axis: In per-channel case - the axis for the quantization. In per-tensor - ignored.
:return: Parameters of Quantizer.
"""
per_channel = quantizer_config.per_channel
num_bits = quantizer_config.num_bits
mode = quantizer_config.mode

if per_channel:
assert axis is not None
axes = list(range(len(weight_tensor.shape)))
axes.pop(axis)
axes = tuple(axes)
else:
axes = None
input_high = np.amax(weight_tensor, axis=axes)
input_low = np.amin(weight_tensor, axis=axes)
scales = calculate_scale_level(input_high, input_low, num_bits, mode)
zero_points = np.zeros_like(scales, dtype=np.int64)
return ONNXQuantizerLayerParameters(scales.tolist(), zero_points.tolist(), mode, axis)


def calculate_activation_quantizer_parameters(statistics: MinMaxTensorStatistic,
quantizer_config: QuantizerConfig,
axis: Optional[int] = None) -> ONNXQuantizerLayerParameters:
"""
Calculates Quantizer/Dequantizer layer attributes for activation quantizer such as scale, zero_points and
quantization mode: symmetric, asymmetric.
:param statistics: Collected statistics for the quantized insertion.
:param quantizer_config: Config of the quantization configuration.
:param axis: Axis of the quantization. None in a per-tensor quantization case.
:return: Parameters of the quantizer/dequantizer layers.
"""
per_channel = quantizer_config.per_channel
num_bits = quantizer_config.num_bits
mode = quantizer_config.mode
input_low = statistics.min_values
input_high = statistics.max_values
if per_channel:
assert axis is not None
raise RuntimeError('Currently per-channel is not supported for activation tensors.')
scales = calculate_scale_level(input_high, input_low, num_bits, mode)
zero_points = np.zeros_like(scales, dtype=np.int64)
return ONNXQuantizerLayerParameters(scales.tolist(), zero_points.tolist(), mode, axis)
5 changes: 2 additions & 3 deletions tests/openvino/native/test_metatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from nncf.experimental.openvino_native.graph.metatypes.openvino_metatypes import OVReshapeMetatype
from nncf.experimental.openvino_native.graph.metatypes.openvino_metatypes import OVSubtractMetatype
from nncf.experimental.openvino_native.graph.metatypes.openvino_metatypes import OVTransposeMetatype
from nncf.experimental.openvino_native.graph.metatypes.openvino_metatypes import GENERAL_WEIGHT_LAYER_METATYPES

from nncf.experimental.openvino_native.graph.nncf_graph_builder import GraphConverter

Expand Down Expand Up @@ -54,8 +55,6 @@ def test_mapping_openvino_metatypes(model_creator_func, ref_metatypes):
assert Counter(ref_metatypes) == Counter(actual_metatypes)


from nncf.experimental.openvino_native.graph.metatypes.openvino_metatypes import GENERAL_WEIGHT_LAYER_METATYPES

REF_WEIGHTS_PORT_IDS = {
'Conv': 1,
'Conv_backprop': 1,
Expand All @@ -73,4 +72,4 @@ def test_determining_weights_port():
if 'weight_port_id' in node.layer_attributes:
counter += 1
assert node.layer_attributes.weight_port_id == REF_WEIGHTS_PORT_IDS[node.node_name]
assert counter == len(REF_METATYPES_COUNTERS)
assert counter == len(REF_METATYPES_COUNTERS)