CortexFoundation
diff --git a/‎python/mrt/sym_utils.py
Lines changed: 3 additions & 0 deletions b/‎python/mrt/sym_utils.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎python/mrt/yamrt/__init__.py
Lines changed: 4 additions & 0 deletions b/‎python/mrt/yamrt/__init__.py
Lines changed: 4 additions & 0 deletions
diff --git a/‎python/mrt/yamrt/autoquanter.py
Lines changed: 65 additions & 0 deletions b/‎python/mrt/yamrt/autoquanter.py
Lines changed: 65 additions & 0 deletions
diff --git a/‎python/mrt/yamrt/fquant/__init__.py
Lines changed: 2 additions & 0 deletions b/‎python/mrt/yamrt/fquant/__init__.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎python/mrt/yamrt/fquant/common.py
Lines changed: 40 additions & 0 deletions b/‎python/mrt/yamrt/fquant/common.py
Lines changed: 40 additions & 0 deletions
diff --git a/‎python/mrt/yamrt/fquant/proxy.py
Lines changed: 50 additions & 0 deletions b/‎python/mrt/yamrt/fquant/proxy.py
Lines changed: 50 additions & 0 deletions
diff --git a/‎python/mrt/yamrt/fquant/uniform_affine_quantizer.py
Lines changed: 179 additions & 0 deletions b/‎python/mrt/yamrt/fquant/uniform_affine_quantizer.py
Lines changed: 179 additions & 0 deletions
diff --git a/‎python/mrt/yamrt/model/__init__.py b/‎python/mrt/yamrt/model/__init__.py
diff --git a/‎python/mrt/yamrt/model/block/__init__.py b/‎python/mrt/yamrt/model/block/__init__.py
@@ -471,6 +471,9 @@ def get_entry_id(sym):
             oindex = json.loads(graph.json())['heads'][0][1]
     return oindex
 
+def has_multi_outs(sym):
+    return sym.attr('op_name') in MULTIPYE_OUTS_NODE
+
 def get_node(sym, graph):
     """ Get the symbol from the provided graph which has the same name as the given symbol.
 
 
@@ -0,0 +1,4 @@
+from .fquant import *
+from .modelhandler import ModelHandler, MxnetModelHandler
+from mrt.transformer import *
+from .autoquanter import *
@@ -0,0 +1,65 @@
+# General
+from .modelhandler import ModelHandler
+ 
+# Mxnet
+from mrt.transformer import *
+from mrt import tfm_pass as tpass
+
+
+class AutoQuanter(object):
+    def __init__(self, model:ModelHandler):
+        self._model = model
+
+    def prepare(self, *args, **kwargs):
+        raise NotImplementedError
+
+    def ptq_pre(self, *args, **kwargs):
+        raise NotImplementedError
+
+    def ptq_pre_param(self, *args, **kwargs):
+        raise NotImplementedError
+
+    def ptq(self, *args, **kwargs):
+        raise NotImplementedError 
+
+    def ptq_collect(self, *args, **kwargs):
+        raise NotImplementedError
+
+    #TODO: Add full APIs.
+
+class MxnetAutoQuanter(AutoQuanter):
+    def __init__(self, model:ModelHandler):
+        super(MxnetAutoQuanter, self).__init__(model)
+
+    def prepare(self, input_shape:dict=None): #TODO: Turn configurable like ptq_pre.
+        assert(input_shape is not None)
+        self._model.visit_model(tpass.name_duplicate_check)
+        if isinstance(input_shape, dict):
+            self._model.update_model(tpass.attach_input_shape, input_shape=input_shape)
+            self._model.update_model(tpass.fuse_multiple_inputs)
+        elif input_shape is not None:
+            model_inputs = self._model.visit_model(tpass.model_inputs)
+            assert model_inputs == 1, "Multiple inputs non-known shape"
+            self._model.update_model(tpass.input_name_replace)
+            self._model.update_model(tpass.attach_input_shape, {"data": input_shape})
+        self._model.visit_model(tpass.infer_shape)
+
+        self._model.update_model(tpass.fuse_multiple_outputs)
+        self._model.update_model(tpass.fuse_constant)
+        self._model.update_model(tpass.fuse_transpose)
+        self._model.update_model(tpass.rewrite)
+        self._model.update_model(tpass.fuse_constant)
+        self._model.update_model(tpass.params_unique)
+
+    def ptq_pre(self, rule_list):
+        self._model.update_model(tpass.ptq_pre, rule_list=rule_list)
+
+    def ptq_pre_param(self, config):
+        pass
+
+    def ptq(self, ):
+
+        raise NotImplementedError 
+
+    def ptq_collect(self):
+        raise NotImplementedError
@@ -0,0 +1,2 @@
+from .uniform_affine_quantizer import *
+from .proxy import *
@@ -0,0 +1,40 @@
+import mxnet as mx
+
+QUANT_OP_PREFIX = "MRT_"
+
+
+
+class Wrapper(object):
+    """Basic Class for Quantization Info, Factory Functions, etc.  
+    """
+    def __init__(self, op:mx.sym.Symbol, config:dict):
+        self._ori_op = op
+        self._config = config
+        self._attr_dict = {}
+        self._build_attr_dict()
+        self._op = None
+        self._param = None
+
+    def _build_attr_dict(self):
+        raise NotImplementedError
+
+    def new_op(self):
+        self._op = mx.sym.Custom(**self._attr_dict)
+        return self._op
+
+    def op(self):
+        return self._op
+
+    def attr(self, key:str):
+        if key in self._attr_dict:
+            return self._attr_dict[key]
+        return 'null'
+
+    def key(self):
+        return self._attr_dict[name]
+
+    def init_param(self, *args, **kwargs):
+        raise NotImplementedError
+
+    def param(self)->dict:
+        return self._param
@@ -0,0 +1,50 @@
+from .common import *
+import mxnet as mx
+
+
+class ProxyWrapper(Wrapper):
+    def __init__(self, op, config):
+        super(ProxyWrapper, self).__init__(op, config)
+
+    def _build_attr_dict(self):
+        # None Symble
+        self._attr_dict['op_type'] = self._config['q_op_name']
+        self._attr_dict['name'] = f"{self._attr_dict['op_type']}_{self._ori_op.attr('name')}"
+        # Symbles
+        self._attr_dict['data'] = self._ori_op
+        self._attr_dict['qbias'] = mx.sym.Variable(**self._ori_op.list_attr(), name=f"{self._attr_dict['name']}_qbias")
+
+
+class Proxy(mx.operator.CustomOp):
+    def __init__(self):
+        super(Proxy, self).__init__()
+
+    def forward(self, is_train, req, in_data, out_data, aux):
+        self.assign(out_data[0], req[0], in_data[1])
+
+    def backward(self, req, out_grad, in_data, out_data, in_grad, aux): # Seems like checkpoint techs in pytorch 
+        assert(req[0] == req[1])
+        self.assign(in_grad[1], req[0], out_grad[0])
+
+
+@mx.operator.register(QUANT_OP_PREFIX + "Proxy")
+class ProxyProp(mx.operator.CustomOpProp):
+    def __init__(self):
+        super(ProxyProp, self).__init__()
+        
+    def list_arguments(self):
+        return ['data', 'qbias']
+
+    def list_outputs(self):
+        return ['output']
+
+    def infer_shape(self, in_shape):
+        assert(len(in_shape)==2)
+        return [*in_shape], [in_shape[0]], []
+
+    def infer_type(self, in_type):
+        return [*in_type], [in_type[0]], []
+
+    def create_operator(self, ctx, shapes, dtypes):
+        return Proxy()
+
@@ -0,0 +1,179 @@
+from .common import *
+import mxnet as mx
+import mxnet.ndarray as nd
+
+def _round_ste(x):
+    return mx.nd.stop_gradient(mx.nd.round(x) - x) + x
+
+
+def _new_detached_nd(*args):
+    res = []
+    for item in args:
+        res.append(item.detach())
+    return res
+
+
+class UniformAffineQuantizerWrapper(Wrapper):
+    _scale_methods = ['max_scale', 'max', 'mse']
+    def __init__(self, op, config):
+        self.channel_wise = False
+        self.scale_method = config['scale_method'] if 'scale_method' in config else _scale_methods[0]
+        super(UniformAffineQuantizerWrapper, self).__init__(op, config)
+        self.delta_nd = None
+        self.delta_op = None
+        self.zero_point_nd = None
+        self.zero_point_op = None
+
+    def _build_attr_dict(self):
+        assert(self._config['q_op_name'] not in self._ori_op.attr('name'))
+        # None Symble
+        self._attr_dict['op_type'] = self._config['q_op_name']
+        self._attr_dict['name'] = f"{self._attr_dict['op_type']}_{self._ori_op.attr('name')}"
+        self._attr_dict['n_bits'] = self._config['n_bits']
+        self.channel_wise = self._config['channel_wise']
+        # Symbles
+        self._attr_dict['data'] = self._ori_op
+        if not self.channel_wise:
+            self.delta_op = mx.sym.Variable(f"{self._attr_dict['name']}_delta", shape=(1))
+            self.zero_point_op = mx.sym.Variable(f"{self._attr_dict['name']}_zero_point", shape=(1))
+            self._attr_dict['delta'] = self.delta_op
+            self._attr_dict['zero_point'] = self.zero_point_op
+        elif self.channel_wise:
+            # Assume the the fisrt dim of input data is channel
+            assert(len(self._ori_op.infer_shape()[1]) == 1)
+            ori_op_shape = self._ori_op.infer_shape()[1][0]
+            channel_wise_shape = (ori_op_shape[0], * ([1] * (len(ori_op_shape) - 1)))
+            self.delta_op = mx.sym.Variable(
+                f"{self._attr_dict['name']}_delta",
+                shape=channel_wise_shape)
+            self.zero_point_op = mx.sym.Variable(
+                f"{self._attr_dict['name']}_zero_point",
+                shape=channel_wise_shape)
+            self._attr_dict['delta'] = self.delta_op
+            self._attr_dict['zero_point'] = self.zero_point_op
+        else:
+            raise TypeError
+
+    def init_param(self, data: nd.NDArray):
+        pass
+
+    def _init_param_impl(self, input_data: nd.NDArray, channel_wise:bool=False):
+        delta, zero_point = None, None
+        if channel_wise:
+            x_clone = input_data.copy().detach()
+            n_channels = x_clone.shape[0]
+            if len(x.shape) == 4:
+                x_max = x_clone.abs().max(dim=-1)[0].max(dim=-1)[0].max(dim=-1)[0]
+            else:
+                x_max = x_clone.abs().max(dim=-1)[0]
+            delta = x_max.clone()
+            zero_point = x_max.clone()
+            # determine the scale and zero point channel-by-channel
+            for c in range(n_channels):
+                delta[c], zero_point[c] = self.init_quantization_scale(x_clone[c], channel_wise=False)
+            if len(x.shape) == 4:
+                delta = delta.view(-1, 1, 1, 1)
+                zero_point = zero_point.view(-1, 1, 1, 1)
+            else:
+                delta = delta.view(-1, 1)
+                zero_point = zero_point.view(-1, 1)
+        else:
+            if 'max' in self.scale_method:
+                x_min = min(x.min().item(), 0)
+                x_max = max(x.max().item(), 0)
+                if 'scale' in self.scale_method:
+                    x_min = x_min * (self.n_bits + 2) / 8
+                    x_max = x_max * (self.n_bits + 2) / 8
+
+                x_absmax = max(abs(x_min), x_max)
+                if self.sym:
+                    x_min, x_max = -x_absmax if x_min < 0 else 0, x_absmax
+
+                delta = float(x_max - x_min) / (self.n_levels - 1)
+                if delta < 1e-8:
+                    warnings.warn('Quantization range close to zero: [{}, {}]'.format(x_min, x_max))
+                    delta = 1e-8
+
+                zero_point = round(-x_min / delta)
+                delta = torch.tensor(delta).type_as(x)
+
+            elif self.scale_method == 'mse':
+                # we always use symmetric quantization in mse mode
+                x_absmax = x.abs().max()
+                x_min = x.min().item()
+                best_score = 1000
+                for i in range(80):
+                    new_max = x_absmax * (1.0 - (i * 0.01))
+                    x_q = self.quantize(x, new_max)
+                    # L_p norm minimization as described in LAPQ
+                    # https://arxiv.org/abs/1911.07190
+                    score = lp_loss(x, x_q, p=2.4, reduction='all')
+                    if score < best_score:
+                        best_score = score
+                        delta = (2 * new_max) / (2 ** self.n_bits - 1)
+                        zero_point = (new_max / delta).round() if x_min < 0 else 0
+                        # re-calculate the scale delta if zero-point is not 0,
+            else:
+                raise NotImplementedError
+#    def init_param(self, data:nd.NDArray, scale_method:str='max'):
+#        assert scale_method in _scale_methods
+#        if self.channel_wise:
+#            data_abs = data.abs()
+#            data_max_per_channel = 
+
+
+
+class UniformAffineQuantizer(mx.operator.CustomOp):
+    def __init__(self, n_bits):
+        super(UniformAffineQuantizer, self).__init__()
+        self.n_bits = n_bits
+        self.n_levels = 2 ** self.n_bits
+
+    def forward(self, is_train, req, in_data, out_data, aux):
+        conv_weight, delta, zero_point = in_data[0], in_data[1], in_data[2]
+        x_int = _round_ste(conv_weight / delta) + zero_point #TODO: Zero point is hard to implemented in the Fully Quantized Conditions.
+        x_quant = mx.nd.clip(x_int, 0, self.n_levels - 1)
+        x_dequant = (x_quant - zero_point) * delta
+        self.assign(out_data[0], req[0], x_dequant)
+
+    def backward(self, req, out_grad, in_data, out_data, in_grad, aux): # Seems like checkpoint techs in pytorch 
+        conv_weight, delta, zero_point = _new_detached_nd(*in_data[:3])# in_data[0].copy().detach(), in_data[1].copy().detach(), in_data[2].copy().detach()
+        conv_weight.attach_grad()
+        delta.attach_grad()
+        zero_point.attach_grad()
+        with mx.autograd.record():
+            x_int = _round_ste(conv_weight / delta) + zero_point
+            x_quant = mx.nd.clip(x_int, 0, self.n_levels - 1)
+            x_dequant = (x_quant - zero_point) * delta
+        x_dequant.backward(_new_detached_nd(out_grad[0])[0])
+
+        self.assign(in_grad[0], req[0], conv_weight.grad)
+        self.assign(in_grad[1], req[1], delta.grad)
+        self.assign(in_grad[2], req[2], zero_point.grad)
+
+
+@mx.operator.register(QUANT_OP_PREFIX + "UniformAffineQuantizer")
+class UniformAffineQuantizerProp(mx.operator.CustomOpProp):
+    def __init__(self, n_bits):
+        super(UniformAffineQuantizerProp, self).__init__()
+        n_bits = n_bits if type(n_bits) is int else int(n_bits) 
+
+        assert 2 <= n_bits <= 32, 'bitwidth not supported'
+        self.n_bits = n_bits
+
+    def list_arguments(self):
+        return ['data', 'delta', 'zero_point']
+
+    def list_outputs(self):
+        return ['output']
+
+    def infer_shape(self, in_shape):
+        assert(len(in_shape)==3)
+        return [*in_shape], [in_shape[0]], []
+
+    def infer_type(self, in_type):
+        return [*in_type], [in_type[0]], []
+
+    def create_operator(self, ctx, shapes, dtypes):
+        return UniformAffineQuantizer(n_bits=self.n_bits)
+
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+from .uniform_affine_quantizer import *`
	`2`	`+from .proxy import *`