Skip to content

Commit 8e78ee6

Browse files
authored
Merge pull request #16157 from NHZlX/anakin_subgraph_refine
refine anakin subgraph.
2 parents fbfedc8 + 0cc465a commit 8e78ee6

27 files changed

+401
-97
lines changed

paddle/fluid/framework/ir/graph_pattern_detector.cc

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1437,6 +1437,16 @@ PDNode *patterns::AnakinDetectionPattern::operator()(
14371437
->assert_is_op_output("box_coder")
14381438
->AsIntermediate();
14391439

1440+
auto transpose_before_nms =
1441+
pattern->NewNode(GetNodeName("transpose_before_nms"))
1442+
->assert_is_op("transpose2");
1443+
1444+
auto transpose_before_nms_out =
1445+
pattern->NewNode(GetNodeName("transpose_before_nms_out"))
1446+
->assert_is_op_output("transpose2")
1447+
->assert_is_op_input("multiclass_nms", "Scores")
1448+
->AsIntermediate();
1449+
14401450
auto multiclass_nms_op = pattern->NewNode(GetNodeName("multiclass_nms"))
14411451
->assert_is_op("multiclass_nms")
14421452
->assert_op_has_n_inputs("multiclass_nms", 2);
@@ -1487,8 +1497,10 @@ PDNode *patterns::AnakinDetectionPattern::operator()(
14871497
{concat_out1, concat_out2, conv_in[kBoxCoderThirdInputOffset]});
14881498
box_coder_out->LinksFrom({box_coder_op});
14891499

1490-
multiclass_nms_op
1491-
->LinksFrom({box_coder_out, conv_in[kMultiClassSecondInputNmsOffset]})
1500+
transpose_before_nms->LinksFrom({conv_in[kMultiClassSecondInputNmsOffset]});
1501+
transpose_before_nms_out->LinksFrom({transpose_before_nms});
1502+
1503+
multiclass_nms_op->LinksFrom({box_coder_out, transpose_before_nms_out})
14921504
.LinksTo({multiclass_nms_out});
14931505

14941506
return multiclass_nms_out;

paddle/fluid/framework/ir/simplify_anakin_detection_pattern_pass.cc

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ std::unique_ptr<ir::Graph> SimplifyAnakinDetectionPatternPass<times>::ApplyImpl(
4545

4646
input_nodes.push_back(gpd.mutable_pattern()
4747
->NewNode("x" + std::to_string(times + 1))
48-
->assert_is_op_input("multiclass_nms", "Scores")
48+
->assert_is_op_input("transpose2")
4949
->AsInput());
5050

5151
patterns::AnakinDetectionPattern pattern(gpd.mutable_pattern(), pattern_name);
@@ -106,6 +106,11 @@ std::unique_ptr<ir::Graph> SimplifyAnakinDetectionPatternPass<times>::ApplyImpl(
106106
Node *box_coder_out = subgraph.at(pattern.GetPDNode("box_coder_out"));
107107

108108
Node *multiclass_nms_second_input = subgraph.at(input_nodes[times + 1]);
109+
Node *transpose_before_nms =
110+
subgraph.at(pattern.GetPDNode("transpose_before_nms"));
111+
Node *transpose_before_nms_out =
112+
subgraph.at(pattern.GetPDNode("transpose_before_nms_out"));
113+
109114
Node *multiclass_nms = subgraph.at(pattern.GetPDNode("multiclass_nms"));
110115
Node *multiclass_nms_out =
111116
subgraph.at(pattern.GetPDNode("multiclass_nms_out"));
@@ -133,11 +138,11 @@ std::unique_ptr<ir::Graph> SimplifyAnakinDetectionPatternPass<times>::ApplyImpl(
133138
nodes[i * kNumFields + kPriorBoxLocOffset]->Name());
134139
}
135140

136-
int axis = boost::get<int>(concat_op1->Op()->GetAttr("axis"));
141+
// int axis = boost::get<int>(concat_op1->Op()->GetAttr("axis"));
137142
framework::OpDesc concat1_desc;
138143
concat1_desc.SetType("concat");
139144
concat1_desc.SetInput("X", concat1_input_names);
140-
concat1_desc.SetAttr("axis", axis);
145+
concat1_desc.SetAttr("axis", 2);
141146
concat1_desc.SetOutput("Out", {concat_out1->Name()});
142147

143148
auto *new_add_concat_op = graph->CreateOpNode(&concat1_desc);
@@ -184,6 +189,8 @@ std::unique_ptr<ir::Graph> SimplifyAnakinDetectionPatternPass<times>::ApplyImpl(
184189
delete_nodes.insert(concat_out2);
185190
delete_nodes.insert(box_coder_op);
186191
delete_nodes.insert(box_coder_out);
192+
delete_nodes.insert(transpose_before_nms);
193+
delete_nodes.insert(transpose_before_nms_out);
187194
delete_nodes.insert(multiclass_nms);
188195

189196
new_add_concat_op->outputs.push_back(concat_out1);
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
cc_library(anakin_engine SRCS engine.cc)
2-
nv_library(anakin_op_teller SRCS op_teller.cc DEPS framework_proto)
2+
cc_library(anakin_op_teller SRCS op_teller.cc DEPS framework_proto)
33
target_link_libraries(anakin_engine anakin anakin_saber_common)
44
cc_test(test_anakin_engine SRCS test_anakin_engine.cc DEPS anakin_engine)
55
add_subdirectory(convert)

paddle/fluid/inference/anakin/convert/batch_norm.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,13 @@ void BatchNormOpConverter::operator()(const framework::proto::OpDesc &op,
4343
auto output = op_desc.Output("Y").front();
4444
auto op_name = op_desc.Type() + ":" + op_desc.Output("Y").front();
4545
auto epsilon = boost::get<float>(op_desc.GetAttr("epsilon"));
46+
// auto momentum = boost::get<float>(op_desc.GetAttr("momentum"));
4647

4748
auto bn_op_name = op_name + ":bn";
4849
auto bn_output = bn_op_name + "_output";
4950
engine_->AddOp(bn_op_name, "BatchNorm", {inputs["X"]}, {bn_output});
5051
engine_->AddOpAttr(bn_op_name, "epsilon", epsilon);
52+
engine_->AddOpAttr(bn_op_name, "momentum", static_cast<float>(1.0));
5153

5254
auto scale_op_name = op_name + ":scale";
5355
auto get_lod_tensor = [this, &scope, &op_name](const std::string &var_name,

paddle/fluid/inference/anakin/convert/density_prior_box.cc

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ namespace paddle {
2727
namespace inference {
2828
namespace anakin {
2929

30-
void DensityPriorBoxOpConverter::operator()(const framework::proto::OpDesc &op,
31-
const framework::Scope &scope,
30+
void DensityPriorBoxOpConverter::operator()(const framework::proto::OpDesc& op,
31+
const framework::Scope& scope,
3232
bool test_mode) {
3333
framework::OpDesc op_desc(op, nullptr);
3434
auto input_name = op_desc.Input("Input").front();
@@ -42,34 +42,45 @@ void DensityPriorBoxOpConverter::operator()(const framework::proto::OpDesc &op,
4242
auto fixed_ratios =
4343
boost::get<std::vector<float>>(op_desc.GetAttr("fixed_ratios"));
4444
auto densities = boost::get<std::vector<int>>(op_desc.GetAttr("densities"));
45+
std::vector<float> dens;
46+
for (auto& ele : densities) {
47+
dens.push_back(static_cast<float>(ele));
48+
}
4549

4650
// lack flip
47-
auto clip = boost::get<bool>(op_desc.GetAttr("clip"));
51+
// auto clip = boost::get<bool>(op_desc.GetAttr("clip"));
4852
auto variances = boost::get<std::vector<float>>(op_desc.GetAttr("variances"));
53+
for (auto& ele : variances) {
54+
LOG(INFO) << ele;
55+
}
4956

5057
// lack img_h, img_w
5158
auto step_h = boost::get<float>(op_desc.GetAttr("step_h"));
5259
auto step_w = boost::get<float>(op_desc.GetAttr("step_w"));
5360
auto offset = boost::get<float>(op_desc.GetAttr("offset"));
54-
std::vector<std::string> order = {"MIN", "COM", "MAX"};
61+
PTuple<std::string> t_order;
62+
t_order.push_back("MIN");
63+
t_order.push_back("COM");
64+
t_order.push_back("MAX");
65+
5566
std::vector<float> temp_v = {};
5667

5768
engine_->AddOp(op_name, "PriorBox", {input_name, image_name}, {output_name});
5869
engine_->AddOpAttr<PTuple<float>>(op_name, "min_size", temp_v);
5970
engine_->AddOpAttr<PTuple<float>>(op_name, "max_size", temp_v);
6071
engine_->AddOpAttr<PTuple<float>>(op_name, "aspect_ratio", temp_v);
61-
engine_->AddOpAttr<PTuple<float>>(op_name, "fixed_sizes", fixed_sizes);
62-
engine_->AddOpAttr<PTuple<float>>(op_name, "fixed_ratios", fixed_ratios);
63-
engine_->AddOpAttr<PTuple<int>>(op_name, "density", densities);
64-
engine_->AddOpAttr(op_name, "is_flip", false);
65-
engine_->AddOpAttr(op_name, "is_clip", clip);
72+
engine_->AddOpAttr<PTuple<float>>(op_name, "fixed_size", fixed_sizes);
73+
engine_->AddOpAttr<PTuple<float>>(op_name, "fixed_ratio", fixed_ratios);
74+
engine_->AddOpAttr<PTuple<float>>(op_name, "density", dens);
75+
engine_->AddOpAttr(op_name, "is_flip", static_cast<bool>(false));
76+
engine_->AddOpAttr(op_name, "is_clip", static_cast<bool>(false));
6677
engine_->AddOpAttr<PTuple<float>>(op_name, "variance", variances);
6778
engine_->AddOpAttr(op_name, "img_h", static_cast<int>(0));
6879
engine_->AddOpAttr(op_name, "img_w", static_cast<int>(0));
6980
engine_->AddOpAttr(op_name, "step_h", step_h);
7081
engine_->AddOpAttr(op_name, "step_w", step_w);
7182
engine_->AddOpAttr(op_name, "offset", offset);
72-
engine_->AddOpAttr<PTuple<std::string>>(op_name, "order", order);
83+
engine_->AddOpAttr<PTuple<std::string>>(op_name, "order", t_order);
7384
}
7485

7586
} // namespace anakin

paddle/fluid/inference/anakin/convert/op_converter.h

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include <string>
1919
#include <unordered_map>
2020
#include <unordered_set>
21+
#include <vector>
2122
#include "framework/core/types.h"
2223
#include "paddle/fluid/framework/block_desc.h"
2324
#include "paddle/fluid/framework/op_registry.h"
@@ -68,6 +69,35 @@ class AnakinOpConverter {
6869
ConvertOp(op, parameters, scope, engine);
6970
}
7071
}
72+
73+
// The scope here should be inited with the parameter vars.
74+
void ConvertBlockToAnakinEngine(
75+
framework::BlockDesc *block_desc, const framework::Scope &scope,
76+
const std::vector<std::string> &inputs,
77+
const std::unordered_set<std::string> &parameters,
78+
const std::vector<std::string> &outputs, AnakinNvEngine *engine) {
79+
framework::proto::BlockDesc *block_proto = block_desc->Proto();
80+
ConvertBlock(*block_proto, parameters, scope, engine);
81+
engine->Freeze();
82+
for (auto &input : inputs) {
83+
if (parameters.count(input)) continue;
84+
auto *var = block_desc->FindVar(input);
85+
PADDLE_ENFORCE(var, "no variable called %s", input);
86+
87+
auto var_shape = var->GetShape();
88+
PADDLE_ENFORCE(var_shape.size() == 4);
89+
std::vector<int> input_shape;
90+
for (int i = 0; i < var_shape.size(); i++) {
91+
input_shape.push_back(var_shape[i]);
92+
}
93+
input_shape[0] = 1;
94+
95+
engine->SetInputShape(input, input_shape);
96+
}
97+
engine->Optimize();
98+
engine->InitGraph();
99+
}
100+
71101
void SetEngine(AnakinNvEngine *engine) { engine_ = engine; }
72102
virtual ~AnakinOpConverter() {}
73103

paddle/fluid/inference/anakin/convert/pool2d.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ void Pool2dOpConverter::operator()(const framework::proto::OpDesc &op,
5555
if (pool_type == "max") {
5656
anakin_pool_type = "MAX";
5757
} else if (pool_type == "avg") {
58-
anakin_pool_type = "AVG";
58+
anakin_pool_type = "AVGEXC";
5959
} else {
6060
PADDLE_THROW("TensorRT unsupported pooling type!");
6161
}

paddle/fluid/inference/anakin/convert/softmax.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ void SoftMaxOpConverter::operator()(const framework::proto::OpDesc &op,
3333
auto output = op_desc.Output("Out").front();
3434
auto op_name = op_desc.Type() + ":" + op_desc.Output("Out").front();
3535
engine_->AddOp(op_name, "Softmax", {input}, {output});
36-
engine_->AddOpAttr(op_name, "axis", 1);
36+
engine_->AddOpAttr(op_name, "axis", 2);
3737
}
3838

3939
} // namespace anakin

paddle/fluid/inference/anakin/convert/test_batch_norm_op.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,9 @@ TEST(batch_norm_op, test) {
5252
desc.SetOutput("SavedVariance", {"batch_norm_save_variance"});
5353

5454
float eps = 1e-5f;
55+
bool is_test = true;
5556
desc.SetAttr("epsilon", eps);
56-
desc.SetAttr("is_test", true);
57+
desc.SetAttr("is_test", is_test);
5758

5859
validator.SetOp(*desc.Proto());
5960

paddle/fluid/inference/anakin/convert/test_pool2d_op.cc

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,11 +64,52 @@ void test_pool2d(bool global_pooling, bool ceil_mode,
6464
validator.Execute(1);
6565
}
6666

67+
void test_pool2d2(bool global_pooling, bool ceil_mode,
68+
std::string pool_type = "max") {
69+
auto* pool2d_converter =
70+
Registry<AnakinOpConverter>::Global().Lookup("pool2d");
71+
ASSERT_TRUE(pool2d_converter);
72+
73+
framework::Scope scope;
74+
std::unordered_set<std::string> parameters;
75+
AnakinConvertValidation validator(parameters, scope);
76+
77+
// The ITensor's Dims should not contain the batch size.
78+
// So, the ITensor's Dims of input and output should be C * H * W.
79+
validator.DeclInputVar("pool2d_x", {1, 1, 17, 17});
80+
validator.DeclOutputVar("pool2d_out", {1, 1, 17, 17});
81+
82+
// Prepare Op description
83+
framework::OpDesc desc;
84+
desc.SetType("pool2d");
85+
desc.SetInput("X", {"pool2d_x"});
86+
desc.SetOutput("Out", {"pool2d_out"});
87+
88+
std::vector<int> ksize({3, 3});
89+
std::vector<int> strides({1, 1});
90+
std::vector<int> paddings({1, 1});
91+
std::string pooling_t = pool_type;
92+
93+
desc.SetAttr("pooling_type", pooling_t);
94+
desc.SetAttr("ksize", ksize);
95+
desc.SetAttr("strides", strides);
96+
desc.SetAttr("paddings", paddings);
97+
desc.SetAttr("global_pooling", global_pooling);
98+
desc.SetAttr("ceil_mode", true);
99+
100+
LOG(INFO) << "set OP";
101+
validator.SetOp(*desc.Proto());
102+
LOG(INFO) << "execute";
103+
104+
validator.Execute(1);
105+
}
106+
67107
TEST(Pool2dOpConverter, normal) { test_pool2d(false, false); }
68108
TEST(Pool2dOpConverter, test_global_pooling) { test_pool2d(true, false); }
69109

70110
TEST(Pool2dOpConverter, max_ceil_test) { test_pool2d(false, true); }
71111
TEST(Pool2dOpConverter, avg_ceil_test) { test_pool2d(false, true, "avg"); }
112+
TEST(Pool2dOpConverter, avg_ceil_test2) { test_pool2d2(false, true, "avg"); }
72113

73114
} // namespace anakin
74115
} // namespace inference

0 commit comments

Comments
 (0)