diff --git a/src/models/LanguageModel.cc b/src/models/LanguageModel.cc index 0561d5a..a55c7b8 100644 --- a/src/models/LanguageModel.cc +++ b/src/models/LanguageModel.cc @@ -141,12 +141,12 @@ void LanguageModel::initialize_weight(std::vector>& weig } weight_table.push_back(std::move(create_weight(name_gen(OperationType::LmHead, ParameterType::Weight), {_hidden_size, _model_config["vocab_size"]}))); - + _wgt_size = 0; for (auto& wgt : weight_table) { if(_run_single_layer && wgt->get_name() != name_gen(OperationType::LmHead, ParameterType::Weight)) { _wgt_size += ((uint64_t)wgt->get_size()) * _num_layers; - } + } else { _wgt_size += wgt->get_size(); } @@ -166,7 +166,7 @@ void LanguageModel::initialize_model(std::vector>& weigh } std::vector act_dim = {num_tokens, _hidden_size}; std::map qkv_attr = { - {"has_bias", "1"}, + {"has_bias", "1"}, {"input_shape", dims_to_string(act_dim)}, {"weight_shape", dims_to_string({_hidden_size,_qkv_out_dim})}, {"output_shape", dims_to_string({num_tokens, _qkv_out_dim})}}; @@ -213,14 +213,14 @@ void LanguageModel::initialize_model(std::vector>& weigh uint32_t id = tensor->get_id(); _tensor_map[id] = std::move(tensor); } - + std::map empty_attr; for(int l = 0; l < _num_sim_layers; l++) { //QKV Proejction std::string qkv_name = name_gen(LAYER(l), BlockType::Attention, OperationType::QKVGen); uint32_t qkv_weight_id = _wgt_map[name_gen(qkv_name, ParameterType::Weight)]; uint32_t qkv_bias_id = _wgt_map[name_gen(qkv_name, ParameterType::Bias)]; - auto qkv_op = std::make_unique(_config, (Model*) this, qkv_name, qkv_attr); + auto qkv_op = std::make_unique(_config, (Model*) this, qkv_name, qkv_attr, _target_core); qkv_op->add_input(input_id); qkv_op->add_input(qkv_weight_id); qkv_op->add_input(qkv_bias_id); @@ -229,7 +229,7 @@ void LanguageModel::initialize_model(std::vector>& weigh register_operation(std::move(qkv_op)); //KV Cache auto kv_cache_op = std::make_unique( - _config, (Model*) this, name_gen(LAYER(l), BlockType::Attention, OperationType::KVCacheConcat), kv_concat_attr); + _config, (Model*) this, name_gen(LAYER(l), BlockType::Attention, OperationType::KVCacheConcat), kv_concat_attr, _target_core); kv_cache_op->add_input(qkv_output_id); for(int b = 0; b < _num_batch; b++) { uint32_t key_cache_id = load_key_cache(l, b); @@ -252,7 +252,7 @@ void LanguageModel::initialize_model(std::vector>& weigh for(int b = 0; b < _num_batch; b++) { std::string attn_name = name_gen(LAYER(l), BlockType::Attention, OperationType::Attention, std::to_string(b)); attention_attr["num_tokens"] = std::to_string(input_lengthes[b]); - auto attn_op = std::make_unique(_config, (Model*) this, attn_name, attention_attr); + auto attn_op = std::make_unique(_config, (Model*) this, attn_name, attention_attr, _target_core); attn_op->add_input(queries[b]); attn_op->add_input(keys[b]); attn_op->add_input(values[b]); @@ -263,7 +263,7 @@ void LanguageModel::initialize_model(std::vector>& weigh } //Concatenate attention outputs std::string attn_concat_name = name_gen(LAYER(l), BlockType::Attention, OperationType::AttentionConcat); - auto attn_concat_op = std::make_unique(_config, (Model*) this, attn_concat_name, concat_attr); + auto attn_concat_op = std::make_unique(_config, (Model*) this, attn_concat_name, concat_attr, _target_core); for(int b = 0; b < _num_batch; b++) { attn_concat_op->add_input(attention_outs[b]); } @@ -274,7 +274,7 @@ void LanguageModel::initialize_model(std::vector>& weigh std::string proj_name = name_gen(LAYER(l), BlockType::Attention, OperationType::Projection); uint32_t proj_weight_id = _wgt_map[name_gen(proj_name, ParameterType::Weight)]; uint32_t proj_bias_id = _wgt_map[name_gen(proj_name, ParameterType::Bias)]; - auto proj_op = std::make_unique(_config, (Model*) this, proj_name, proj_attr); + auto proj_op = std::make_unique(_config, (Model*) this, proj_name, proj_attr, _target_core); proj_op->add_input(attn_concat_output_id); proj_op->add_input(proj_weight_id); proj_op->add_input(proj_bias_id); @@ -285,7 +285,7 @@ void LanguageModel::initialize_model(std::vector>& weigh std::string ln_name = name_gen(LAYER(l), BlockType::Attention, OperationType::LayerNorm); uint32_t ln_weight_id = _wgt_map[name_gen(ln_name, ParameterType::Weight)]; uint32_t ln_bias_id = _wgt_map[name_gen(ln_name, ParameterType::Bias)]; - auto ln_op = std::make_unique(_config, (Model*) this, ln_name, empty_attr); + auto ln_op = std::make_unique(_config, (Model*) this, ln_name, empty_attr, _target_core); ln_op->add_input(input_id); ln_op->add_input(proj_output_id); ln_op->add_input(ln_weight_id); @@ -298,7 +298,7 @@ void LanguageModel::initialize_model(std::vector>& weigh uint32_t ffn1_weight_id = _wgt_map[name_gen(ffn_name, OperationType::FullyConnected1, ParameterType::Weight)]; uint32_t ffn1_bias_id = _wgt_map[name_gen(ffn_name, OperationType::FullyConnected1, ParameterType::Bias)]; auto ffn1_op = std::make_unique( - _config, (Model*) this, name_gen(ffn_name, OperationType::FullyConnected1), ffn1_attr); + _config, (Model*) this, name_gen(ffn_name, OperationType::FullyConnected1), ffn1_attr, _target_core); ffn1_op->add_input(ln_output_id); ffn1_op->add_input(ffn1_weight_id); ffn1_op->initialize_tiles(_mapping_table); @@ -306,7 +306,7 @@ void LanguageModel::initialize_model(std::vector>& weigh register_operation(std::move(ffn1_op)); //Gelu std::string act_name = name_gen(LAYER(l), BlockType::FeedForward, OperationType::Act); - auto act_op = std::make_unique(_config, (Model*) this, act_name, bias_act_attr); + auto act_op = std::make_unique(_config, (Model*) this, act_name, bias_act_attr, _target_core); act_op->add_input(ffn1_output_id); act_op->add_input(ffn1_bias_id); act_op->initialize_tiles(_mapping_table); @@ -316,7 +316,7 @@ void LanguageModel::initialize_model(std::vector>& weigh uint32_t ffn2_weight_id = _wgt_map[name_gen(ffn_name, OperationType::FullyConnected2, ParameterType::Weight)]; uint32_t ffn2_bias_id = _wgt_map[name_gen(ffn_name, OperationType::FullyConnected2, ParameterType::Bias)]; auto ffn2_op = std::make_unique( - _config, (Model*) this, name_gen(ffn_name, OperationType::FullyConnected2), ffn2_attr); + _config, (Model*) this, name_gen(ffn_name, OperationType::FullyConnected2), ffn2_attr, _target_core); ffn2_op->add_input(act_output_id); ffn2_op->add_input(ffn2_weight_id); ffn2_op->add_input(ffn2_bias_id); @@ -327,7 +327,7 @@ void LanguageModel::initialize_model(std::vector>& weigh std::string ff_ln_name = name_gen(LAYER(l), BlockType::FeedForward, OperationType::LayerNorm); uint32_t ff_ln_weight_id = _wgt_map[name_gen(ff_ln_name, ParameterType::Weight)]; uint32_t ff_ln_bias_id = _wgt_map[name_gen(ff_ln_name, ParameterType::Bias)]; - auto ff_ln_op = std::make_unique(_config, (Model*) this, ff_ln_name, empty_attr); + auto ff_ln_op = std::make_unique(_config, (Model*) this, ff_ln_name, empty_attr, _target_core); ff_ln_op->add_input(ln_output_id); ff_ln_op->add_input(ffn2_output_id); ff_ln_op->add_input(ff_ln_weight_id); @@ -342,7 +342,7 @@ void LanguageModel::initialize_model(std::vector>& weigh if(val->check_executable()) { spdlog::debug("runnable op, {}", val->get_optype()); _executable_layer.push_back(val.get()); - } + } } /* Model initialization time measurement */ auto end = std::chrono::high_resolution_clock::now(); diff --git a/src/operations/AdaptiveAvgPool.cc b/src/operations/AdaptiveAvgPool.cc index e05747d..7b13fe9 100644 --- a/src/operations/AdaptiveAvgPool.cc +++ b/src/operations/AdaptiveAvgPool.cc @@ -4,8 +4,8 @@ #include "../Tensor.h" AdaptiveAvgPool::AdaptiveAvgPool(SimulationConfig config, Model* model, - onnx::NodeProto& node_proto) - : Operation(config, model, node_proto) { + onnx::NodeProto& node_proto, uint32_t target_core) + : Operation(config, model, node_proto, target_core) { int kernel_dim = 0; for (auto attribute : node_proto.attribute()) { if (attribute.name() == "kernel_shape") { diff --git a/src/operations/AdaptiveAvgPool.h b/src/operations/AdaptiveAvgPool.h index 0da4d36..4d2b012 100644 --- a/src/operations/AdaptiveAvgPool.h +++ b/src/operations/AdaptiveAvgPool.h @@ -5,7 +5,7 @@ class AdaptiveAvgPool : public Operation { public: AdaptiveAvgPool(SimulationConfig config, Model* model, - onnx::NodeProto& node_proto); + onnx::NodeProto& node_proto, uint32_t target_core=0); AdaptiveAvgPool(const AdaptiveAvgPool& src); virtual void initialize_tiles(MappingTable& mapping_table) override; diff --git a/src/operations/Attention.cc b/src/operations/Attention.cc index 799939d..3688a44 100644 --- a/src/operations/Attention.cc +++ b/src/operations/Attention.cc @@ -5,8 +5,8 @@ #include "Softmax.h" Attention::Attention(SimulationConfig config, Model* model, - onnx::NodeProto& node_proto) - : Operation(config, model, node_proto) { + onnx::NodeProto& node_proto, uint32_t target_core) + : Operation(config, model, node_proto, target_core) { onnx = true; for (auto attribute : node_proto.attribute()) { if (attribute.name() == "num_heads") { @@ -62,8 +62,8 @@ Attention::Attention(SimulationConfig config, Model* model, } Attention::Attention(SimulationConfig config, Model* model, - std::string name, std::map& attributes) - :Operation(config, model, name, attributes) { + std::string name, std::map& attributes, uint32_t target_core) + :Operation(config, model, name, attributes, target_core) { _batch_size = 1; _q_len = std::stoi(get_attribute("num_tokens")); _nh = std::stoi(get_attribute("num_heads")); @@ -138,7 +138,7 @@ void Attention::initialize_tiles(MappingTable& mapping_table) { float kv_mem = _seq * _dk * _nkvh * 2 * _config.precision / (float) 1e9; //GB float q_mem = _q_len * _dk * _nh * 2 * _config.precision / (float) 1e9; //GB float total_mem = kv_mem + q_mem; - float compute_time = (qk_flops + kv_flops) / _config.max_systolic_flops(0) * 1e3; + float compute_time = (qk_flops + kv_flops) / _config.max_systolic_flops(target_core) * 1e3; compute_time += softmax_flops / _config.max_vector_flops(target_core) * 1e3; float mem_time = total_mem / _config.max_dram_bandwidth() * 1e3; float total_time = std::max(compute_time, mem_time); @@ -146,9 +146,9 @@ void Attention::initialize_tiles(MappingTable& mapping_table) { spdlog::info("[Attention] Theoretical time(ms): {} Compute time: {} Memory time: {}", total_time, compute_time, mem_time); spdlog::info("[Attention] QK compute {:.4f}ms Softmax compute {:.4f}ms SV compute {:.4f}ms", - qk_flops / _config.max_systolic_flops(0) * 1e3, + qk_flops / _config.max_systolic_flops(target_core) * 1e3, softmax_flops / _config.max_vector_flops(target_core) * 1e3, - kv_flops / _config.max_systolic_flops(0) * 1e3); + kv_flops / _config.max_systolic_flops(target_core) * 1e3); } void Attention::initialize_onnx_tiles(MappingTable& mapping_table) { @@ -161,7 +161,7 @@ void Attention::initialize_onnx_tiles(MappingTable& mapping_table) { /* Create linear node and tensors */ uint32_t fused_op_id = 0; - _projection_node = new GemmWS(_config, mapping_table, _input_shape, _weight_shape, _liner_output_shape); + _projection_node = new GemmWS(_config, mapping_table, _input_shape, _weight_shape, _liner_output_shape, target_core); std::unique_ptr key_projection = std::make_unique( _id, "", _projection_output_shape, _config.precision, false); std::unique_ptr query_projection = std::make_unique( @@ -591,7 +591,7 @@ void Attention::initialize_non_fused_tiles(MappingTable& mapping_table) { for (int req_idx = 0; req_idx < _batch_size; req_idx++) { for (int head_off=0; head_off<_nh; head_off++) { /* Key query matmul */ - GemmWS key_query = GemmWS(_config, mapping_table, single_head_query_shape, single_head_key_shape, query_key_shape); + GemmWS key_query = GemmWS(_config, mapping_table, single_head_query_shape, single_head_key_shape, query_key_shape, target_core); /* Todo. dram addr */ key_query.has_bias = false; key_query.initialize_tiles(mapping_table); @@ -624,7 +624,7 @@ void Attention::initialize_non_fused_tiles(MappingTable& mapping_table) { _tiles.push_back(std::make_unique(Tile{.status = Tile::Status::BAR, .layer_id = _id})); /* attention x value */ - GemmWS attention = GemmWS(_config, mapping_table, query_key_shape, single_head_value_shape, single_output_shape); + GemmWS attention = GemmWS(_config, mapping_table, query_key_shape, single_head_value_shape, single_output_shape, target_core); /* Todo. dram addr */ attention.has_bias = false; attention.initialize_tiles(mapping_table); diff --git a/src/operations/Attention.h b/src/operations/Attention.h index 6b53a5c..08b2629 100644 --- a/src/operations/Attention.h +++ b/src/operations/Attention.h @@ -5,8 +5,8 @@ class Attention : public Operation { public: - Attention(SimulationConfig config, Model* model, onnx::NodeProto& node_proto); - Attention(SimulationConfig config, Model* model, std::string name, std::map& attributes); + Attention(SimulationConfig config, Model* model, onnx::NodeProto& node_proto, uint32_t target_core=0); + Attention(SimulationConfig config, Model* model, std::string name, std::map& attributes, uint32_t target_core=0); //std::vector> get_outputs(std::vector> inputs) override; uint32_t _batch_size; diff --git a/src/operations/BiasAct.cc b/src/operations/BiasAct.cc index 2d4ef76..610e7b7 100644 --- a/src/operations/BiasAct.cc +++ b/src/operations/BiasAct.cc @@ -9,8 +9,8 @@ static const std::map activation_map = { }; BiasAct::BiasAct(SimulationConfig config, Model* model, - onnx::NodeProto& node_proto) - : Operation(config, model, node_proto) { + onnx::NodeProto& node_proto, uint32_t target_core) + : Operation(config, model, node_proto, target_core) { /* Load weight info from node */ _input_shape = get_input(0)->get_dims(); @@ -34,8 +34,8 @@ BiasAct::BiasAct(SimulationConfig config, Model* model, } BiasAct::BiasAct(SimulationConfig config, Model* model, - std::string name, std::map &attributes) - : Operation(config, model, name, attributes) { + std::string name, std::map &attributes, uint32_t target_core) + : Operation(config, model, name, attributes, target_core) { _activation = activation_map.at(get_attribute("activation")); _use_bias = std::stoi(get_attribute("has_bias")); _llama_mlp = std::stoi(get_attribute("llama_mlp")); diff --git a/src/operations/BiasAct.h b/src/operations/BiasAct.h index eac2dc7..3795e33 100644 --- a/src/operations/BiasAct.h +++ b/src/operations/BiasAct.h @@ -3,9 +3,9 @@ class BiasAct : public Operation { public: - BiasAct(SimulationConfig config, Model* model, onnx::NodeProto& node_proto); + BiasAct(SimulationConfig config, Model* model, onnx::NodeProto& node_proto, uint32_t target_core=0); BiasAct(SimulationConfig config, Model* model, std::string name, - std::map& attributes); + std::map& attributes, uint32_t target_core=0); void initialize_tiles(MappingTable& mapping_table) override; diff --git a/src/operations/BiasGelu.cc b/src/operations/BiasGelu.cc index 03ac27c..dfef354 100644 --- a/src/operations/BiasGelu.cc +++ b/src/operations/BiasGelu.cc @@ -2,8 +2,8 @@ #include "../Model.h" BiasGelu::BiasGelu(SimulationConfig config, Model* model, - onnx::NodeProto& node_proto) - : Operation(config, model, node_proto) { + onnx::NodeProto& node_proto, uint32_t target_core) + : Operation(config, model, node_proto, target_core) { /* Load weight info from node */ _input_shape = get_input(0)->get_dims(); @@ -28,8 +28,8 @@ BiasGelu::BiasGelu(SimulationConfig config, Model* model, } BiasGelu::BiasGelu(SimulationConfig config, Model* model, - std::string name, std::map &attributes) - : Operation(config, model, name, attributes) { + std::string name, std::map &attributes, uint32_t target_core) + : Operation(config, model, name, attributes, target_core) { //TODO:implement this } diff --git a/src/operations/BiasGelu.h b/src/operations/BiasGelu.h index b74667b..851cec3 100644 --- a/src/operations/BiasGelu.h +++ b/src/operations/BiasGelu.h @@ -3,8 +3,8 @@ class BiasGelu : public Operation { public: - BiasGelu(SimulationConfig config, Model* model, onnx::NodeProto& node_proto); - BiasGelu(SimulationConfig config, Model* model, std::string name, std::map& attributes); + BiasGelu(SimulationConfig config, Model* model, onnx::NodeProto& node_proto, uint32_t target_core=0); + BiasGelu(SimulationConfig config, Model* model, std::string name, std::map& attributes, uint32_t target_core=0); std::vector _bias_shape; diff --git a/src/operations/Concat.cc b/src/operations/Concat.cc index db28e64..b5b900d 100644 --- a/src/operations/Concat.cc +++ b/src/operations/Concat.cc @@ -5,8 +5,8 @@ #include "../Tensor.h" Concat::Concat(SimulationConfig config, Model* model, - onnx::NodeProto& node_proto) - : Operation(config, model, node_proto) { + onnx::NodeProto& node_proto, uint32_t target_core) + : Operation(config, model, node_proto, target_core) { for (auto attribute : node_proto.attribute()) { if (attribute.name() == "axis") { spdlog::trace("concat axis {}", attribute.ints(0)); @@ -45,8 +45,8 @@ Concat::Concat(const Concat& src) : Operation(src) { } Concat::Concat(SimulationConfig config, Model* model, - std::string name, std::map &attributes) - : Operation(config, model, name, attributes) { + std::string name, std::map &attributes, uint32_t target_core) + : Operation(config, model, name, attributes, target_core) { //TODO:implement this _axis = std::stoi(get_attribute("axis")); } diff --git a/src/operations/Concat.h b/src/operations/Concat.h index 08f369a..5bbe65b 100644 --- a/src/operations/Concat.h +++ b/src/operations/Concat.h @@ -5,10 +5,10 @@ class Concat : public Operation { public: - Concat(SimulationConfig config, Model* model, onnx::NodeProto& node_proto); + Concat(SimulationConfig config, Model* model, onnx::NodeProto& node_proto, uint32_t target_core=0); Concat(const Concat& src); Concat(SimulationConfig config, Model* model, std::string name, - std::map& attributes); + std::map& attributes, uint32_t target_core=0); virtual void initialize_tiles(MappingTable& mapping_table) override; virtual void initialize_instructions(Tile* tile, Mapping mapping) override; protected: diff --git a/src/operations/Conv.cc b/src/operations/Conv.cc index dc523a8..063a5f4 100644 --- a/src/operations/Conv.cc +++ b/src/operations/Conv.cc @@ -5,8 +5,8 @@ #include "../Model.h" #include "../Tensor.h" -Conv::Conv(SimulationConfig config, Model* model, onnx::NodeProto& node_proto) - : Operation(config, model, node_proto) { +Conv::Conv(SimulationConfig config, Model* model, onnx::NodeProto& node_proto, uint32_t target_core) + : Operation(config, model, node_proto, target_core) { int kernel_dim = 0; _activation_fused = false; _pool_fused = false; @@ -210,8 +210,8 @@ void Conv::im2col_nhwc() { } Conv::Conv(SimulationConfig config, MappingTable& mapping_table, - convInfo info) - : Operation(config, mapping_table){ + convInfo info, uint32_t target_core) + : Operation(config, mapping_table, target_core){ _kernel_shape = info.kernel_shape; _strides = info.strides; _dilations = info.dilations; diff --git a/src/operations/Conv.h b/src/operations/Conv.h index 3fbe9d1..dbeac47 100644 --- a/src/operations/Conv.h +++ b/src/operations/Conv.h @@ -25,13 +25,13 @@ struct convInfo{ class Conv : public Operation { public: - Conv(SimulationConfig config, Model* model, onnx::NodeProto& node_proto); + Conv(SimulationConfig config, Model* model, onnx::NodeProto& node_proto, uint32_t target_core=0); Conv(const Conv& src); - Conv(SimulationConfig config, MappingTable& mapping_table, convInfo info); + Conv(SimulationConfig config, MappingTable& mapping_table, convInfo info, uint32_t target_core=0); // virtual void initialize_tiles(MappingTable& mapping_table) override; protected: virtual void im2col_nhwc(); - // void init(SimulationConfig config, Model* model, onnx::NodeProto& node_proto); + // void init(SimulationConfig config, Model* model, onnx::NodeProto& node_proto, uint32_t target_core=0); protected: std::vector _kernel_shape; diff --git a/src/operations/ConvOS.cc b/src/operations/ConvOS.cc index a5f19ed..e28f1c9 100644 --- a/src/operations/ConvOS.cc +++ b/src/operations/ConvOS.cc @@ -6,8 +6,8 @@ #include "../Tensor.h" ConvOS::ConvOS(SimulationConfig config, Model* model, - onnx::NodeProto& node_proto) - : Conv(config, model, node_proto) {} + onnx::NodeProto& node_proto, uint32_t target_core) + : Conv(config, model, node_proto, target_core) {} ConvOS::ConvOS(const Conv& src) : Conv(src) {} @@ -28,7 +28,8 @@ void ConvOS::initialize_tiles(MappingTable& mapping_table) { .S = weight_shape[Sdim], .R = weight_shape[Rdim], .Q = output_shape[Hdim], - .P = output_shape[Wdim]}; + .P = output_shape[Wdim], + .target_core = target_core}; Mapping mapping; try { mapping = mapping_table.at(key); diff --git a/src/operations/ConvOS.h b/src/operations/ConvOS.h index cbbec27..27b2345 100644 --- a/src/operations/ConvOS.h +++ b/src/operations/ConvOS.h @@ -4,11 +4,11 @@ class ConvOS : public Conv { public: - ConvOS(SimulationConfig config, Model* model, onnx::NodeProto& node_proto); + ConvOS(SimulationConfig config, Model* model, onnx::NodeProto& node_proto, uint32_t target_core=0); ConvOS(const Conv& src); virtual void initialize_tiles(MappingTable& mapping_table) override; protected: virtual void initialize_instructions(Tile* tile, Mapping mapping) ; - void init(SimulationConfig config, Model* model, onnx::NodeProto& node_proto); + void init(SimulationConfig config, Model* model, onnx::NodeProto& node_proto, uint32_t target_core=0); }; \ No newline at end of file diff --git a/src/operations/ConvWS.cc b/src/operations/ConvWS.cc index 087b800..3b74080 100644 --- a/src/operations/ConvWS.cc +++ b/src/operations/ConvWS.cc @@ -4,8 +4,8 @@ #include "../Tensor.h" ConvWS::ConvWS(SimulationConfig config, Model* model, - onnx::NodeProto& node_proto) - : Conv(config, model, node_proto) {} + onnx::NodeProto& node_proto, uint32_t target_core) + : Conv(config, model, node_proto, target_core) {} ConvWS::ConvWS(const Conv& src) : Conv(src) {} @@ -50,7 +50,8 @@ void ConvWS::initialize_tiles(MappingTable& mapping_table) { .Q = output_shape[Hdim], .P = output_shape[Wdim], .Padding = _pads.at(0), - .Stride = _strides.at(0)}; + .Stride = _strides.at(0), + .target_core = target_core}; Mapping mapping; try { mapping = mapping_table.at(key); diff --git a/src/operations/ConvWS.h b/src/operations/ConvWS.h index 39e4e35..8c51a32 100644 --- a/src/operations/ConvWS.h +++ b/src/operations/ConvWS.h @@ -4,7 +4,7 @@ class ConvWS : public Conv { public: - ConvWS(SimulationConfig config, Model* model, onnx::NodeProto& node_proto); + ConvWS(SimulationConfig config, Model* model, onnx::NodeProto& node_proto, uint32_t target_core=0); ConvWS(const Conv& src); ConvWS(SimulationConfig config, MappingTable& mapping_table, convInfo info); virtual void initialize_tiles(MappingTable& mapping_table) override; @@ -16,7 +16,7 @@ class ConvWS : public Conv { std::vector shape); virtual addr_type make_activation_address(uint32_t N, uint32_t H, uint32_t W, uint32_t C, std::vector shape); - void init(SimulationConfig config, Model* model, onnx::NodeProto& node_proto); + void init(SimulationConfig config, Model* model, onnx::NodeProto& node_proto, uint32_t target_core=0); Instruction make_weight_instruction(int m_offset, int s_offset, int r_offset, int c_offset, Mapping mapping); Instruction make_input_instruction(int m_offset, int s_offset, int r_offset, diff --git a/src/operations/Dummy.cc b/src/operations/Dummy.cc index 34973eb..31b3013 100644 --- a/src/operations/Dummy.cc +++ b/src/operations/Dummy.cc @@ -2,8 +2,8 @@ #include "../Model.h" #include "../Tensor.h" -Dummy::Dummy(SimulationConfig config, Model* model, onnx::NodeProto& node_proto) - : Operation(config, model, node_proto) { +Dummy::Dummy(SimulationConfig config, Model* model, onnx::NodeProto& node_proto, uint32_t target_core) + : Operation(config, model, node_proto, target_core) { _input_shape = get_input(0)->get_dims(); _output_shape = _input_shape; spdlog::trace("output_shape : {}", _output_shape); diff --git a/src/operations/Dummy.h b/src/operations/Dummy.h index 50bdae0..dcc2515 100644 --- a/src/operations/Dummy.h +++ b/src/operations/Dummy.h @@ -5,7 +5,7 @@ class Dummy: public Operation { public: - Dummy(SimulationConfig config, Model* model, onnx::NodeProto& node_proto); + Dummy(SimulationConfig config, Model* model, onnx::NodeProto& node_proto, uint32_t target_core=0); std::vector _input_shape; std::vector _output_shape; diff --git a/src/operations/EmbedLayerNorm.cc b/src/operations/EmbedLayerNorm.cc index d523b0a..44c88b1 100644 --- a/src/operations/EmbedLayerNorm.cc +++ b/src/operations/EmbedLayerNorm.cc @@ -2,8 +2,8 @@ #include "../Model.h" #include "../Tensor.h" -EmbedLayerNorm::EmbedLayerNorm(SimulationConfig config, Model* model, onnx::NodeProto& node_proto) - : Operation(config, model, node_proto) { +EmbedLayerNorm::EmbedLayerNorm(SimulationConfig config, Model* model, onnx::NodeProto& node_proto, uint32_t target_core) + : Operation(config, model, node_proto, target_core) { _input_shape = get_input(0)->get_dims(); _weight_shape = get_input(2)->get_dims(); diff --git a/src/operations/EmbedLayerNorm.h b/src/operations/EmbedLayerNorm.h index ac85cd9..adea7f7 100644 --- a/src/operations/EmbedLayerNorm.h +++ b/src/operations/EmbedLayerNorm.h @@ -4,7 +4,7 @@ class EmbedLayerNorm: public Operation { public: - EmbedLayerNorm(SimulationConfig config, Model* model, onnx::NodeProto& node_proto); + EmbedLayerNorm(SimulationConfig config, Model* model, onnx::NodeProto& node_proto, uint32_t target_core=0); std::vector _input_shape; std::vector _output_shape; diff --git a/src/operations/Flatten.cc b/src/operations/Flatten.cc index 3d73bd6..669e73c 100644 --- a/src/operations/Flatten.cc +++ b/src/operations/Flatten.cc @@ -5,8 +5,8 @@ #include "../Tensor.h" Flatten::Flatten(SimulationConfig config, Model* model, - onnx::NodeProto& node_proto) - : Operation(config, model, node_proto) { + onnx::NodeProto& node_proto, uint32_t target_core) + : Operation(config, model, node_proto, target_core) { for (auto attribute : node_proto.attribute()) { if (attribute.name() == "axis") { spdlog::trace("flatten axis {}", attribute.i()); diff --git a/src/operations/Flatten.h b/src/operations/Flatten.h index 46b9369..03b37ce 100644 --- a/src/operations/Flatten.h +++ b/src/operations/Flatten.h @@ -5,7 +5,7 @@ class Flatten : public Operation { public: - Flatten(SimulationConfig config, Model* model, onnx::NodeProto& node_proto); + Flatten(SimulationConfig config, Model* model, onnx::NodeProto& node_proto, uint32_t target_core=0); Flatten(const Flatten& src); virtual void initialize_tiles(MappingTable& mapping_table) override; virtual void initialize_instructions(Tile* tile, Mapping mapping) override; diff --git a/src/operations/Gemm.cc b/src/operations/Gemm.cc index d8edbc6..69fa41b 100644 --- a/src/operations/Gemm.cc +++ b/src/operations/Gemm.cc @@ -3,8 +3,8 @@ #include "../Model.h" #include "../Tensor.h" -Gemm::Gemm(SimulationConfig config, Model* model, onnx::NodeProto& node_proto) - : Operation(config, model, node_proto) { +Gemm::Gemm(SimulationConfig config, Model* model, onnx::NodeProto& node_proto, uint32_t target_core) + : Operation(config, model, node_proto, target_core) { Mdim = 1; Cdim_w = 0; Cdim = 1; @@ -63,8 +63,8 @@ Gemm::Gemm(SimulationConfig config, Model* model, onnx::NodeProto& node_proto) Gemm::Gemm(SimulationConfig config, MappingTable& mapping_table, std::vector input_shape, std::vector weight_shape, - std::vector output_shape) - : Operation(config, mapping_table) { + std::vector output_shape, uint32_t target_core) + : Operation(config, mapping_table, target_core) { Mdim = 1; Cdim_w = 0; Cdim = 1; @@ -82,8 +82,8 @@ Gemm::Gemm(SimulationConfig config, MappingTable& mapping_table, } Gemm::Gemm(SimulationConfig config, Model* model, std::string name, - std::map &attributes) - :Operation(config, model, name, attributes) { + std::map &attributes, uint32_t target_core) + :Operation(config, model, name, attributes, target_core) { Mdim = 1; Cdim_w = 0; Cdim = 1; diff --git a/src/operations/Gemm.h b/src/operations/Gemm.h index 96e3aa2..f101729 100644 --- a/src/operations/Gemm.h +++ b/src/operations/Gemm.h @@ -3,12 +3,12 @@ class Gemm : public Operation { public: - Gemm(SimulationConfig config, Model* model, onnx::NodeProto& node_proto); + Gemm(SimulationConfig config, Model* model, onnx::NodeProto& node_proto, uint32_t target_core=0); Gemm(SimulationConfig config, MappingTable& mapping_table, std::vector output_shape, std::vector input_shape, - std::vector weight_shape); + std::vector weight_shape, uint32_t target_core=0); Gemm(SimulationConfig config, Model* model, std::string name, - std::map& attributes); + std::map& attributes, uint32_t target_core=0); protected: diff --git a/src/operations/GemmOS.cc b/src/operations/GemmOS.cc index cfd6818..57fe803 100644 --- a/src/operations/GemmOS.cc +++ b/src/operations/GemmOS.cc @@ -3,8 +3,8 @@ #include "../Model.h" GemmOS::GemmOS(SimulationConfig config, Model* model, - onnx::NodeProto& node_proto) - : Gemm(config, model, node_proto) {} + onnx::NodeProto& node_proto, uint32_t target_core) + : Gemm(config, model, node_proto, target_core) {} /* TODO : Implement this */ void GemmOS::initialize_tiles(MappingTable& mapping_table) { diff --git a/src/operations/GemmOS.h b/src/operations/GemmOS.h index aafc9cf..9283099 100644 --- a/src/operations/GemmOS.h +++ b/src/operations/GemmOS.h @@ -3,7 +3,7 @@ class GemmOS : public Gemm { public: - GemmOS(SimulationConfig config, Model* model, onnx::NodeProto& node_proto); + GemmOS(SimulationConfig config, Model* model, onnx::NodeProto& node_proto, uint32_t target_core=0); void initialize_tiles(MappingTable& mapping_table) override; private: }; \ No newline at end of file diff --git a/src/operations/GemmWS.cc b/src/operations/GemmWS.cc index 2ed7f89..4109154 100644 --- a/src/operations/GemmWS.cc +++ b/src/operations/GemmWS.cc @@ -3,23 +3,24 @@ #include "../Model.h" GemmWS::GemmWS(SimulationConfig config, Model* model, - onnx::NodeProto& node_proto) - : Gemm(config, model, node_proto) {} + onnx::NodeProto& node_proto, uint32_t target_core) + : Gemm(config, model, node_proto, target_core) {} -GemmWS::GemmWS(SimulationConfig config, Model* model, onnx::NodeProto& node_proto, bool has_bias) - : GemmWS(config, model, node_proto) { +GemmWS::GemmWS(SimulationConfig config, Model* model, onnx::NodeProto& node_proto, bool has_bias, uint32_t target_core) + : GemmWS(config, model, node_proto, target_core) { this->has_bias = has_bias; } GemmWS::GemmWS(SimulationConfig config, MappingTable& mapping_table, std::vector input_shape, std::vector weight_shape, - std::vector output_shape) - : Gemm(config, mapping_table, input_shape, weight_shape, output_shape) {} + std::vector output_shape, + uint32_t target_core) + : Gemm(config, mapping_table, input_shape, weight_shape, output_shape, target_core) {} GemmWS::GemmWS(SimulationConfig config, Model* model, std::string name, - std::map& attributes) - : Gemm(config, model, name, attributes) { + std::map& attributes, uint32_t target_core) + : Gemm(config, model, name, attributes, target_core) { has_bias = std::stoi(get_attribute("has_bias")); } @@ -30,7 +31,8 @@ void GemmWS::initialize_tiles(MappingTable& mapping_table) { .S = 1, .R = 1, .Q = 1, - .P = 1}; + .P = 1, + .target_core = target_core}; Mapping mapping; try { @@ -80,7 +82,7 @@ void GemmWS::initialize_tiles(MappingTable& mapping_table) { total_memory += bias_memory; } spdlog::info("[GemmWS]: total {} GFLOPs, {} GB", total_flops, total_memory); - float theoretical_compute_time = total_flops / _config.max_systolic_flops(0); + float theoretical_compute_time = total_flops / _config.max_systolic_flops(target_core); float theoretical_mem_time = total_memory / _config.max_dram_bandwidth(); float theoretical_time = std::max(theoretical_compute_time, theoretical_mem_time); spdlog::info("[GemmWS]: Theoretical time(ms): {} Compute time: {} Memory time: {}", diff --git a/src/operations/GemmWS.h b/src/operations/GemmWS.h index 547a6e4..e2186cc 100644 --- a/src/operations/GemmWS.h +++ b/src/operations/GemmWS.h @@ -3,12 +3,12 @@ class GemmWS : public Gemm { public: - GemmWS(SimulationConfig config, Model* model, onnx::NodeProto& node_proto); - GemmWS(SimulationConfig config, Model* model, onnx::NodeProto& node_proto, bool has_true); + GemmWS(SimulationConfig config, Model* model, onnx::NodeProto& node_proto, uint32_t target_core=0); + GemmWS(SimulationConfig config, Model* model, onnx::NodeProto& node_proto, bool has_true, uint32_t target_core=0); GemmWS(SimulationConfig config, MappingTable& mapping_table, std::vector input_shape, std::vector weight_shape, - std::vector output_shape); - GemmWS(SimulationConfig config, Model* model, std::string name, std::map& attribute); + std::vector output_shape, uint32_t target_core); + GemmWS(SimulationConfig config, Model* model, std::string name, std::map& attribute, uint32_t target_core); virtual void initialize_tiles(MappingTable& mapping_table) override; bool has_bias = true; protected: diff --git a/src/operations/GlobalAvgPool.cc b/src/operations/GlobalAvgPool.cc index 81394f0..0af06f7 100644 --- a/src/operations/GlobalAvgPool.cc +++ b/src/operations/GlobalAvgPool.cc @@ -3,8 +3,8 @@ #include "../Model.h" -GlobalAvgPool::GlobalAvgPool(SimulationConfig config, Model* model, onnx::NodeProto& node_proto) - : Operation(config, model, node_proto) { +GlobalAvgPool::GlobalAvgPool(SimulationConfig config, Model* model, onnx::NodeProto& node_proto, uint32_t target_core) + : Operation(config, model, node_proto, target_core) { /* We assume conv2d */ std::vector input_shape = get_input(0)->get_dims(); diff --git a/src/operations/GlobalAvgPool.h b/src/operations/GlobalAvgPool.h index 51fa7a4..082486f 100644 --- a/src/operations/GlobalAvgPool.h +++ b/src/operations/GlobalAvgPool.h @@ -3,7 +3,7 @@ class GlobalAvgPool : public Operation { public: - GlobalAvgPool(SimulationConfig config, Model* model, onnx::NodeProto& node_proto); + GlobalAvgPool(SimulationConfig config, Model* model, onnx::NodeProto& node_proto, uint32_t target_core=0); GlobalAvgPool(const GlobalAvgPool& src); virtual void initialize_tiles(MappingTable& mapping_table) override; diff --git a/src/operations/KVCacheConcat.cc b/src/operations/KVCacheConcat.cc index 6c520e2..7533eaf 100644 --- a/src/operations/KVCacheConcat.cc +++ b/src/operations/KVCacheConcat.cc @@ -7,8 +7,8 @@ // // head_size, block_size] // const int num_kv_heads, // [num_heads] KVCacheConcat::KVCacheConcat(SimulationConfig config, Model* model, - onnx::NodeProto& node_proto) - : Operation(config, model, node_proto) { + onnx::NodeProto& node_proto, uint32_t target_core) + : Operation(config, model, node_proto, target_core) { spdlog::error("KVCacheConcat: Not implemented"); throw std::runtime_error("KVCacheConcat: Not implemented"); } @@ -20,8 +20,8 @@ KVCacheConcat::KVCacheConcat(const KVCacheConcat& src) : Operation(src) { KVCacheConcat::KVCacheConcat(SimulationConfig config, Model* model, std::string name, - std::map& attributes) - : Operation(config, model, name, attributes) { + std::map& attributes, uint32_t target_core) + : Operation(config, model, name, attributes, target_core) { _input_token_lengths = parse_dims(get_attribute("input_token_lengths")); _num_kv_heads = std::stoi(get_attribute("num_kv_heads")); _num_attention_heads = std::stoi(get_attribute("num_heads")); diff --git a/src/operations/KVCacheConcat.h b/src/operations/KVCacheConcat.h index 7178740..f6b1484 100644 --- a/src/operations/KVCacheConcat.h +++ b/src/operations/KVCacheConcat.h @@ -5,10 +5,10 @@ class KVCacheConcat : public Operation { public: KVCacheConcat(SimulationConfig config, Model* model, - onnx::NodeProto& node_proto); + onnx::NodeProto& node_proto, uint32_t target_core=0); KVCacheConcat(const KVCacheConcat& src); KVCacheConcat(SimulationConfig config, Model* model, std::string name, - std::map& attributes); + std::map& attributes, uint32_t target_core=0); void initialize_tiles(MappingTable& mapping_table) override; private: void calculate_loops(); diff --git a/src/operations/MaxPool.cc b/src/operations/MaxPool.cc index 2b3f5ba..62108a1 100644 --- a/src/operations/MaxPool.cc +++ b/src/operations/MaxPool.cc @@ -6,8 +6,8 @@ #include "../Tensor.h" MaxPool::MaxPool(SimulationConfig config, Model* model, - onnx::NodeProto& node_proto) - : Operation(config, model, node_proto) { + onnx::NodeProto& node_proto, uint32_t target_core) + : Operation(config, model, node_proto, target_core) { int kernel_dim = 0; for (auto attribute : node_proto.attribute()) { if (attribute.name() == "kernel_shape") { diff --git a/src/operations/MaxPool.h b/src/operations/MaxPool.h index 91afaf1..cd43a6e 100644 --- a/src/operations/MaxPool.h +++ b/src/operations/MaxPool.h @@ -4,7 +4,7 @@ class MaxPool : public Operation { public: - MaxPool(SimulationConfig config, Model* model, onnx::NodeProto& node_proto); + MaxPool(SimulationConfig config, Model* model, onnx::NodeProto& node_proto, uint32_t target_core=0); MaxPool(const MaxPool& src); virtual void initialize_tiles(MappingTable& mapping_table) override; virtual void initialize_instructions(Tile* tile, Mapping mapping) override; diff --git a/src/operations/Operation.cc b/src/operations/Operation.cc index 2ecee72..79da101 100644 --- a/src/operations/Operation.cc +++ b/src/operations/Operation.cc @@ -3,7 +3,7 @@ #include "../Model.h" Operation::Operation(SimulationConfig config, Model* model, - onnx::NodeProto& node_proto) { + onnx::NodeProto& node_proto, uint32_t target_core) { _id = generate_id(); _model = model; _optype = node_proto.op_type(); @@ -11,6 +11,7 @@ Operation::Operation(SimulationConfig config, Model* model, _proto = node_proto; _finish = false; _config = config; + this->target_core = target_core; spdlog::trace("Node {} op_type {}", _name.c_str(), _optype.c_str()); for (std::string input_proto : node_proto.input()) { /* Skip none input */ @@ -44,10 +45,11 @@ Operation::Operation(SimulationConfig config, Model* model, Rdim = 3; } -Operation::Operation(SimulationConfig config, MappingTable& mapping_table) { +Operation::Operation(SimulationConfig config, MappingTable& mapping_table, uint32_t target_core) { _id = generate_id(); _finish = false; _config = config; + this->target_core = target_core; spdlog::trace("Node {} op_type {}", _name.c_str(), _optype.c_str()); if (_config.layout == "NCHW") { Ndim = 0; @@ -67,7 +69,7 @@ Operation::Operation(SimulationConfig config, MappingTable& mapping_table) { } Operation::Operation(SimulationConfig config, Model* model, - onnx::NodeProto& node_proto, uint32_t id) { + onnx::NodeProto& node_proto, uint32_t id, uint32_t target_core) { _id = id; _model = model; _optype = node_proto.op_type(); @@ -75,6 +77,7 @@ Operation::Operation(SimulationConfig config, Model* model, _proto = node_proto; _finish = false; _config = config; + this->target_core = target_core; spdlog::trace("Node {} op_type {}", _name.c_str(), _optype.c_str()); for (std::string input_proto : node_proto.input()) { Tensor* input_tensor = _model->find_tensor(input_proto); @@ -90,10 +93,11 @@ Operation::Operation(SimulationConfig config, Model* model, } Operation::Operation(SimulationConfig config, Model* model, - std::string name, std::map&attribute) + std::string name, std::map&attribute, uint32_t target_core) : _config(config), _model(model) ,_name(name), _attributes(attribute) { _id = generate_id(); _finish = false; + target_core = target_core; Ndim = 0; Cdim = 3; Hdim = 1; diff --git a/src/operations/Operation.h b/src/operations/Operation.h index c812c01..404eee7 100644 --- a/src/operations/Operation.h +++ b/src/operations/Operation.h @@ -11,12 +11,12 @@ class OpParser; class Operation { public: Operation(SimulationConfig config, Model* model, onnx::NodeProto& node_proto, - uint32_t id); - Operation(SimulationConfig config, MappingTable& mapping_table); - Operation(SimulationConfig config, Model* model, onnx::NodeProto& node_proto); + uint32_t id, uint32_t target_core); + Operation(SimulationConfig config, MappingTable& mapping_table, uint32_t target_core); + Operation(SimulationConfig config, Model* model, onnx::NodeProto& node_proto, uint32_t target_core); Operation(const Operation& operation); Operation(SimulationConfig config, Model* model, - std::string name, std::map&attribute); + std::string name, std::map&attribute, uint32_t target_core); virtual ~Operation() = default; virtual void set_finish(); diff --git a/src/operations/OperationFactory.cc b/src/operations/OperationFactory.cc index 518baf0..bee3235 100644 --- a/src/operations/OperationFactory.cc +++ b/src/operations/OperationFactory.cc @@ -22,43 +22,43 @@ SimulationConfig OperationFactory::_config = SimulationConfig(); void OperationFactory::initialize(SimulationConfig config) { _config = config; } std::unique_ptr OperationFactory::create_operation( - Model* model, onnx::NodeProto& node_proto, uint32_t target_core=0) { + Model* model, onnx::NodeProto& node_proto, uint32_t target_core) { if (node_proto.op_type() == "Conv" || node_proto.op_type() == "FusedConv") { if (_config.core_config[target_core].core_type == CoreType::SYSTOLIC_OS) - return std::make_unique(_config, model, node_proto); + return std::make_unique(_config, model, node_proto, target_core); else if (_config.core_config[target_core].core_type == CoreType::SYSTOLIC_WS) - return std::make_unique(_config, model, node_proto); + return std::make_unique(_config, model, node_proto, target_core); } else if (node_proto.op_type() == "Gemm" || node_proto.op_type() == "FusedGemm") { if (_config.core_config[target_core].core_type == CoreType::SYSTOLIC_WS) - return std::make_unique(_config, model, node_proto); + return std::make_unique(_config, model, node_proto, target_core); } else if (node_proto.op_type() == "MatMul") { - return std::make_unique(_config, model, node_proto, false); + return std::make_unique(_config, model, node_proto, false, target_core); } else if (node_proto.op_type() == "MaxPool") { - return std::make_unique(_config, model, node_proto); + return std::make_unique(_config, model, node_proto, target_core); } else if (node_proto.op_type() == "GlobalAveragePool") { - return std::make_unique(_config, model, node_proto); + return std::make_unique(_config, model, node_proto, target_core); } else if (node_proto.op_type() == "AdaptiveAveragePool" || node_proto.op_type() == "AveragePool") { - return std::make_unique(_config, model, node_proto); + return std::make_unique(_config, model, node_proto, target_core); } else if (node_proto.op_type() == "Flatten") { - return std::make_unique(_config, model, node_proto); + return std::make_unique(_config, model, node_proto, target_core); } else if (node_proto.op_type() == "Attention") { - return std::make_unique(_config, model, node_proto); + return std::make_unique(_config, model, node_proto, target_core); } else if (node_proto.op_type() == "Cast") { - return std::make_unique(_config, model, node_proto); + return std::make_unique(_config, model, node_proto, target_core); } else if (node_proto.op_type() == "EmbedLayerNormalization") { - return std::make_unique(_config, model, node_proto); + return std::make_unique(_config, model, node_proto, target_core); } else if (node_proto.op_type() == "SkipLayerNormalization") { - return std::make_unique(_config, model, node_proto); + return std::make_unique(_config, model, node_proto, target_core); } else if (node_proto.op_type() == "BiasGelu" || node_proto.op_type() == "FastGelu") { - return std::make_unique(_config, model, node_proto); + return std::make_unique(_config, model, node_proto, target_core); } else if (node_proto.op_type() == "ReorderOutput") { - return std::make_unique(_config, model, node_proto); + return std::make_unique(_config, model, node_proto, target_core); } spdlog::warn("Node Proto optype \"{}\" returned dummy operator!", node_proto.op_type().c_str()); - return std::make_unique(_config, model, node_proto); + return std::make_unique(_config, model, node_proto, target_core); } std::unique_ptr OperationFactory::copy_operation(Operation* op) { diff --git a/src/operations/OperationFactory.h b/src/operations/OperationFactory.h index 822b7f6..46cee12 100644 --- a/src/operations/OperationFactory.h +++ b/src/operations/OperationFactory.h @@ -7,7 +7,7 @@ class Model; class OperationFactory { public: static void initialize(SimulationConfig config); - static std::unique_ptr create_operation(Model* model, onnx::NodeProto& node_proto, uint32_t target_core); + static std::unique_ptr create_operation(Model* model, onnx::NodeProto& node_proto, uint32_t target_core=0); static std::unique_ptr copy_operation(Operation* op); private: diff --git a/src/operations/SkipLayerNorm.cc b/src/operations/SkipLayerNorm.cc index 179042d..728488d 100644 --- a/src/operations/SkipLayerNorm.cc +++ b/src/operations/SkipLayerNorm.cc @@ -2,8 +2,8 @@ #include "../Model.h" SkipLayerNorm::SkipLayerNorm(SimulationConfig config, Model* model, - onnx::NodeProto& node_proto) - : Operation(config, model, node_proto) { + onnx::NodeProto& node_proto, uint32_t target_core) + : Operation(config, model, node_proto, target_core) { /* Load weight info from node */ _input_shape = get_input(0)->get_dims(); @@ -36,8 +36,8 @@ SkipLayerNorm::SkipLayerNorm(SimulationConfig config, Model* model, } SkipLayerNorm::SkipLayerNorm(SimulationConfig config, Model *model, - std::string name, std::map& attributes) - : Operation(config, model, name, attributes) { + std::string name, std::map& attributes, uint32_t target_core) + : Operation(config, model, name, attributes, target_core) { //TODO: } diff --git a/src/operations/SkipLayerNorm.h b/src/operations/SkipLayerNorm.h index c5d37b0..453a301 100644 --- a/src/operations/SkipLayerNorm.h +++ b/src/operations/SkipLayerNorm.h @@ -3,8 +3,8 @@ class SkipLayerNorm : public Operation { public: - SkipLayerNorm(SimulationConfig config, Model* model, onnx::NodeProto& node_proto); - SkipLayerNorm(SimulationConfig config, Model* model, std::string name, std::map& attributes); + SkipLayerNorm(SimulationConfig config, Model* model, onnx::NodeProto& node_proto, uint32_t target_core=0); + SkipLayerNorm(SimulationConfig config, Model* model, std::string name, std::map& attributes, uint32_t target_core=0); std::vector _weight_shape; std::vector _bias_shape; diff --git a/src/operations/Softmax.cc b/src/operations/Softmax.cc index 40b2263..a0d4a55 100644 --- a/src/operations/Softmax.cc +++ b/src/operations/Softmax.cc @@ -2,8 +2,8 @@ #include "../Model.h" Softmax::Softmax(SimulationConfig config, Model* model, - onnx::NodeProto& node_proto) - : Operation(config, model, node_proto) { + onnx::NodeProto& node_proto, uint32_t target_core) + : Operation(config, model, node_proto, target_core) { /* Load weight info from node */ _input_shape = get_input(0)->get_dims(); @@ -31,8 +31,8 @@ Softmax::Softmax(SimulationConfig config, Model* model, } Softmax::Softmax(SimulationConfig config, MappingTable& mapping_table, - std::vector input_shape) - : Operation(config, mapping_table) { + std::vector input_shape, uint32_t target_core) + : Operation(config, mapping_table, target_core) { _input_shape = input_shape; _output_shape = input_shape; diff --git a/src/operations/Softmax.h b/src/operations/Softmax.h index b7fe81d..c3f7bba 100644 --- a/src/operations/Softmax.h +++ b/src/operations/Softmax.h @@ -3,9 +3,9 @@ class Softmax : public Operation { public: - Softmax(SimulationConfig config, Model* model, onnx::NodeProto& node_proto); + Softmax(SimulationConfig config, Model* model, onnx::NodeProto& node_proto, uint32_t target_core=0); Softmax(SimulationConfig config, MappingTable& mapping_table, - std::vector input_shape); + std::vector input_shape, uint32_t target_core=0); std::vector _input_shape; std::vector _output_shape; diff --git a/tests/operatons/GemmWSTest.cc b/tests/operatons/GemmWSTest.cc index 2f47511..8a92a30 100644 --- a/tests/operatons/GemmWSTest.cc +++ b/tests/operatons/GemmWSTest.cc @@ -39,7 +39,7 @@ GemmWS make_GemmWS(SimulationConfig config, std::string mapping_str, uint32_t n, mapping_table[key] = mapping; std::vector output_shape = {1, n, m}; std::vector weight_shape = {c, m}; - GemmWS op(config, mapping_table, input_dims, weight_shape, output_shape); + GemmWS op(config, mapping_table, input_dims, weight_shape, output_shape, 0); op.initialize_tiles(mapping_table); return op; }