[Engine]fix lat (#704)

intel · VincyZhang · Apr 4, 2023 · Jan 17, 2023 · Jan 30, 2023 · Jan 30, 2023
commit 547323d07b0516bcdb17631313c29f811ac0b55a
diff --git a/...s/backends/neural_engine/compile/sub_graph/attention_mask_length_adaptive_keep_indices.py b/...s/backends/neural_engine/compile/sub_graph/attention_mask_length_adaptive_keep_indices.py
@@ -92,21 +92,48 @@ def __call__(self, model):
                     },
                     'returns': [13, 14]
                 },
+                # minilmv2-lat-roberta int8
+                {
+                    'patterns': {
+                        'in': [[(0, 'Shape'), (1, 'Gather'), (2, 'Unsqueeze'), (5, 'Concat'),
+                                (6, 'Reshape'), (7, 'Equal'), (8, 'Where'),  (11, 'Expand')],
+                               [(0, 'Shape'), (3, 'Gather'), (4, 'Unsqueeze'), (5, 'Concat')],
+                               [(), (9, 'Unsqueeze'), (10, 'Unsqueeze'), (11, 'Expand')]],
+                        'out': [[(0, 'ExpandIndices')]]
+                    },
+                    'search_mode': 'op_type',
+                    'node_names': {
+                        0: 11,
+                    },
+                    'input_tensors': {
+                        0: [[{
+                            9: [0]
+                        }, {
+                            0: [0]
+                        }], [[0, 1], 2]],
+                    },
+                    'output_tensors': {
+                        0: [[{
+                            11: [0]
+                        }], [[0], 1]],
+                    },
+                    'returns': [9, 10]
+                },
             ]
         }
 
         # minilmv2-lat-roberta
         for idx, pattern_dict in enumerate(pattern_mapping_config['AttentionMaskLengthAdaptiveExpandIndices']):
-           model, new_node_names, ret_old_nodes = \
+            model, new_node_names, ret_old_nodes = \
              util.pattern_mapping('AttentionMaskLengthAdaptiveExpandIndices', pattern_dict, model)
-           if len(new_node_names) != 0:
-              for i in range(len(new_node_names)):
-                attr = OrderedDict()
-                input_indices = []
-                for unsqueeze_node in ret_old_nodes[i]:
-                  input_indices.append(int(unsqueeze_node.attr['axis']))
-                  attr['position'] = util.list2str(input_indices)
-                  keep_indices_node_idx = model.get_node_id(new_node_names[i][0])
-                  model.nodes[keep_indices_node_idx].attr = attr
+            if len(new_node_names) != 0:
+                for i in range(len(new_node_names)):
+                    attr = OrderedDict()
+                    input_indices = []
+                    for unsqueeze_node in ret_old_nodes[i]:
+                        input_indices.append(int(unsqueeze_node.attr['axes']))
+                        attr['position'] = util.list2str(input_indices)
+                        keep_indices_node_idx = model.get_node_id(new_node_names[i][0])
+                        model.nodes[keep_indices_node_idx].attr = attr
 
         return model
diff --git a/...ural_engine/compile/sub_graph/attention_output_layer_norm_length_adaptive_keep_indices.py b/...ural_engine/compile/sub_graph/attention_output_layer_norm_length_adaptive_keep_indices.py
@@ -159,6 +159,33 @@ def __call__(self, model):
                     },
                     'returns': [5,6]
                 },
+
+                # int8 lat
+                {
+                    'patterns': {
+                        'in': [[(0, 'Shape'), (1, 'Gather'), (2, 'Unsqueeze'), (3, 'Concat'),
+                                (4, 'Reshape'), (5, 'Equal'), (6, 'Where'), (8, 'Expand')],
+                               [(), (7, 'Unsqueeze'), (8, 'Expand')]],
+                        'out': [[(0, 'ExpandIndices')]]
+                    },
+                    'search_mode': 'op_type',
+                    'node_names': {
+                        0: 7,
+                    },
+                    'input_tensors': {
+                        0: [[{
+                            7: [0]
+                        }, {
+                            0: [0]
+                        }], [[0, 1], 2]],
+                    },
+                    'output_tensors': {
+                        0: [[{
+                            8: [0]
+                        }], [[0], 1]],
+                    },
+                    'returns': [7]
+                },
             ]
         }
 
@@ -179,7 +206,7 @@ def __call__(self, model):
                     axis_gather = []
                     for ret_old_node in ret_old_nodes[i]:
                         if ret_old_node.op_type == 'Unsqueeze':
-                           input_indices.append(int(ret_old_node.attr['axis']))
+                           input_indices.append(int(ret_old_node.attr['axes']))
                         elif ret_old_node.op_type == 'GatherElements':
                             axis_gather.append(int(ret_old_node.attr['axis']))
 

diff --git a/..._extension_for_transformers/backends/neural_engine/compile/sub_graph/generate_sequence.py b/..._extension_for_transformers/backends/neural_engine/compile/sub_graph/generate_sequence.py
@@ -57,8 +57,34 @@ def __call__(self, model):
                     }], [[0], 1]],
                 },
                 'returns': [11]
-            }]
+            },
+            {
+                'patterns': {
+                    'in': [[(0, "Shape"), (1, 'Gather'), (2, "Unsqueeze"), (3, "Concat"),
+                            (7, "Tile")],
+                           [(0, "Shape"), (4, 'Gather'), (5, 'Range'),
+                            (6, "Unsqueeze"), (7, "Tile")]],
+                    'out': [[(0, 'Range')]]
+                },
+                'search_mode': 'op_type',
+                'node_names': {
+                    0: 5
+                },
+                'input_tensors': {
+                    0: [[{
+                        'input_data': [0]
+                    }], [[0], 1]],
+                },
+                'output_tensors': {
+                    0: [[{
+                        7: [0]
+                    }], [[0], 1]],
+                },
+                'returns': [5, 0]
+            }                   
+            ]
         }
+        collect_node = []
 
         for i in range(len(pattern_mapping_config['GenerateSequence'])):
             pattern_dict = pattern_mapping_config['GenerateSequence'][i]
@@ -72,6 +98,10 @@ def __call__(self, model):
                     attr["step"] = int(old_node.input_tensors[2].data)
                     new_node_idx = model.get_node_id(new_node_names[j][0])
                     model.nodes[new_node_idx].attr = attr
+
+                    if i == 1:
+                        collect_node.append(ret_old_nodes[j][1])
+                model.insert_nodes(10, collect_node)
                 return model
 
         return model
diff --git a/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/quantize_fusion.py b/intel_extension_for_transformers/backends/neural_engine/compile/sub_graph/quantize_fusion.py
@@ -57,12 +57,22 @@ def search_quant_fusion(node):
                   quant_node, can_fuse = search_quant_fusion(node)
                   if can_fuse:
                       if dtype == 'u8' or dtype == 's8':
+
                           if quant_node.op_type == "Softmax":
-                              model.change_node_input_tensors(quant_node.name, 1, node.input_tensors[1],
-                                                              'insert')
-                              model.change_node_input_tensors(quant_node.name, 2, node.input_tensors[2],
-                                                              'insert')
-                              quant_node.attr['output_dtype'] = "u8"
+                              def is_lat_model(model, p=None):
+                                if p == None:
+                                    p = [[(0, 'TopK'),(1, 'GatherElements')]]
+                                match_result = util.search_pattern(p, model)
+                                return len(match_result) != 0
+                              if is_lat_model(model):
+                                node.attr = OrderedDict({'output_dtype': "u8"})
+                                continue
+                              else:
+                                model.change_node_input_tensors(quant_node.name, 1, node.input_tensors[1],
+                                                                'insert')
+                                model.change_node_input_tensors(quant_node.name, 2, node.input_tensors[2],
+                                                                'insert')
+                                quant_node.attr['output_dtype'] = "u8"
                           else:
                               model.change_node_input_tensors(quant_node.name, -2, node.input_tensors[1],
                                                               'modify')

diff --git a/...nsformers/backends/neural_engine/compile/sub_graph/reshape_after_restore_hidden_states.py b/...nsformers/backends/neural_engine/compile/sub_graph/reshape_after_restore_hidden_states.py
@@ -84,6 +84,10 @@ def _set_attr(se_attr, mat_attr, hidden_size, node_names, model):
                 hidden_size = int(ret_old_nodes[i][1].input_tensors[1].shape[0])
                 se_attr = ret_old_nodes[i][0].attr
                 mat_attr = ret_old_nodes[i][1].attr
+                mat_node = model.get_node_by_name(new_node_names[i][2])
+                reshape_node = model.get_node_by_name(new_node_names[i][1])
+                mat_node.input_tensors[0].name = ret_old_nodes[i][1].input_tensors[0].name
+                reshape_node.output_tensors[0].name =  mat_node.input_tensors[0].name
                 _set_attr(se_attr, mat_attr, hidden_size, new_node_names[i], model)
 
             return model

diff --git a/...sformers/backends/neural_engine/compile/sub_graph/reshape_before_restore_hidden_states.py b/...sformers/backends/neural_engine/compile/sub_graph/reshape_before_restore_hidden_states.py
@@ -76,6 +76,12 @@ def _set_attr(ln_attr, se_attr, hidden_size, node_names, model):
             model.nodes[scatter_elements_node_idx].attr = se_attr
 
         # minilmv2-lat-roberta
+        layer_norm_idx = []
+        remove_list = []
+        pattern = pattern_mapping_config['ReshapeBeforeRestoreHiddenStates'][0]['patterns']['in']
+        patterns_nodes_name = util.search_pattern(pattern, model)
+        for pattern_nodes_name in patterns_nodes_name:
+            layer_norm_idx.append(model.get_node_id(pattern_nodes_name[0]))
         pattern_dict = pattern_mapping_config['ReshapeBeforeRestoreHiddenStates'][0]
         model, new_node_names, ret_old_nodes = util.pattern_mapping(
                 'ReshapeBeforeRestoreHiddenStates', pattern_dict, model)
@@ -85,7 +91,15 @@ def _set_attr(ln_attr, se_attr, hidden_size, node_names, model):
                 ln_attr = ret_old_nodes[i][0].attr
                 se_attr = ret_old_nodes[i][1].attr
                 _set_attr(ln_attr, se_attr, hidden_size, new_node_names[i], model)
+                import copy
+                ln_node = copy.deepcopy(model.get_node_by_name(new_node_names[i][0]))
+                model.remove_nodes([new_node_names[i][0]])
+                model.insert_nodes(layer_norm_idx[i] + i, [ln_node])
+
+                remove_list.append(new_node_names[i][0])
+
 
+            # model.remove_nodes(remove_list)
             return model
 
         return model
diff --git a/...eural_engine/compile/sub_graph/restore_hidden_states_in_length_adaptive_update_indices.py b/...eural_engine/compile/sub_graph/restore_hidden_states_in_length_adaptive_update_indices.py
@@ -76,6 +76,44 @@ def __call__(self, model):
                     },
                     'returns': [10, 12]
                 },
+                {
+                    'patterns': {
+                        'in': [[(0, 'Shape'), (1, 'Gather'), (2, 'Unsqueeze'), (3, 'Concat'),
+                                (4, 'Reshape'), (5, 'Equal'), (6, 'Where'), (8, 'Expand'),
+                                (9, 'ScatterElements')],
+                               [(), (7, 'Unsqueeze'), (8, 'Expand')]],
+                        'out': [[(0, 'Reshape'), (1, 'ExpandIndices'), (2, 'ScatterElements')]]
+                    },
+                    'search_mode': 'op_type',
+                    'node_names': {
+                        0: 'reshape_to_3d_before_restoration',
+                        1: 8,
+                        2: 9,
+                    },
+                    'input_tensors': {
+                        0: [[{
+                            0: [0]
+                        }, {
+                            'input_data': [0]
+                        }], [[0, 1], 2]],
+                        1: [[{
+                            7: [0]
+                        }], [[0], 2]],
+                        2: [[{
+                            9: [0],
+                        }, {
+                            9: [2]
+                        }], [[0, 2], 3]],
+                    },
+                    'output_tensors': {
+                        0: [[], [[], 1]],
+                        1: [[], [[], 1]],
+                        2: [[{
+                            9: [0]
+                        }], [[0], 1]],
+                    },
+                    'returns': [7, 9]
+                },
             ]
         }
 
@@ -94,29 +132,29 @@ def _set_attr(input_indices, se_attr, node_names, model):
             model.nodes[se_node_idx].attr = se_attr
 
         # minilmv2-lat-roberta
-        pattern_dict = pattern_mapping_config['RestoreHiddenStatesInLengthAdaptiveUpdateIndices'][
-            0]
-        model, new_node_names, ret_old_nodes = util.pattern_mapping(
-            'RestoreHiddenStatesInLengthAdaptiveUpdateIndices', pattern_dict, model)
-        if len(new_node_names) != 0:
-            for i in range(len(new_node_names)):
-                attr = OrderedDict()
-                input_indices = []
-                unsqueeze_node = ret_old_nodes[i][0]
-                input_indices.append(int(unsqueeze_node.attr['axis']))
-                se_attr = ret_old_nodes[i][1].attr
-                _set_attr(input_indices, se_attr, new_node_names[i], model)
-                # the first scatter elements operation need the output of embedding layer norm
-                # but its output shape is [bsxseq_len, hidden_size]
-                # so the first scatter node need modify this tensor to 3d tensor
-                # whose shape is [bs, seq_len, hidden_size]
-                reshape_3d_node = model.get_node_by_name(new_node_names[i][0])
-                embedding_ln_out_tensor = copy.deepcopy(reshape_3d_node.output_tensors[0])
-                scatter_node = model.get_node_by_name(new_node_names[i][2])
-                # check if one input tensor is from embedding_layer_norm node
-                if scatter_node.input_tensors[0].name == reshape_3d_node.input_tensors[0].name:
-                    model.change_node_input_tensors(new_node_names[i][2], 0,
-                        tensor=embedding_ln_out_tensor, mode='modify')
-            return model
+        for i in range(len(pattern_mapping_config['RestoreHiddenStatesInLengthAdaptiveUpdateIndices'])):
+            pattern_dict = pattern_mapping_config['RestoreHiddenStatesInLengthAdaptiveUpdateIndices'][i]
+            model, new_node_names, ret_old_nodes = util.pattern_mapping(
+                'RestoreHiddenStatesInLengthAdaptiveUpdateIndices', pattern_dict, model)
+            if len(new_node_names) != 0:
+                for i in range(len(new_node_names)):
+                    attr = OrderedDict()
+                    input_indices = []
+                    unsqueeze_node = ret_old_nodes[i][0]
+                    input_indices.append(int(unsqueeze_node.attr['axes']))
+                    se_attr = ret_old_nodes[i][1].attr
+                    _set_attr(input_indices, se_attr, new_node_names[i], model)
+                    # the first scatter elements operation need the output of embedding layer norm
+                    # but its output shape is [bsxseq_len, hidden_size]
+                    # so the first scatter node need modify this tensor to 3d tensor
+                    # whose shape is [bs, seq_len, hidden_size]
+                    reshape_3d_node = model.get_node_by_name(new_node_names[i][0])
+                    embedding_ln_out_tensor = copy.deepcopy(reshape_3d_node.output_tensors[0])
+                    scatter_node = model.get_node_by_name(new_node_names[i][2])
+                    # check if one input tensor is from embedding_layer_norm node
+                    if scatter_node.input_tensors[0].name == reshape_3d_node.input_tensors[0].name:
+                        model.change_node_input_tensors(new_node_names[i][2], 0,
+                            tensor=embedding_ln_out_tensor, mode='modify')
+                return model
 
         return model
diff --git a/...ension_for_transformers/backends/neural_engine/executor/src/operators/gather_elements.cpp b/...ension_for_transformers/backends/neural_engine/executor/src/operators/gather_elements.cpp
@@ -37,6 +37,8 @@ void GatherElementsOperator::Reshape(const vector<Tensor*>& input, const vector<
   auto& input_data_dtype = data->dtype();
   dst_tensor_ptr->set_shape(dst_shape_);
   dst_tensor_ptr->set_dtype(input_data_dtype);
+  outer_ = 1;
+  inner_ = 1;
   for (int i = 0; i < input[0]->shape().size(); i++) {
     if (i < axis_) outer_ *= input[0]->shape()[i];
     if (i > axis_) inner_ *= input[0]->shape()[i];

diff --git a/...eural_engine/test/pytest/test_attention_output_layer_norm_length_adaptive_keep_indices.py b/...eural_engine/test/pytest/test_attention_output_layer_norm_length_adaptive_keep_indices.py
@@ -76,7 +76,7 @@ def test_attention_reshape_0(self):
                          Tensor(name='unsqueeze_input1', data=np.array(1), shape=[1])]
         output_tensors = [Tensor(name='unsqueeze_output', source_op=['unsqueeze'], dest_op=['expand'])]
         unsqueeze_node.construct('unsqueeze', 'Unsqueeze', input_tensors=input_tensors,
-                                output_tensors=output_tensors,attr=OrderedDict({'axis': '1'}))
+                                output_tensors=output_tensors,attr=OrderedDict({'axes': '1'}))
 
         expand_node = OPERATORS['Expand']()
         input_tensors = [Tensor(name='unsqueeze_output', source_op=['unsqueeze'], dest_op=['expand']),