Skip to content

Commit 68fdaf8

Browse files
committed
implement layout autotuning for amp training
1 parent e97046e commit 68fdaf8

File tree

7 files changed

+252
-224
lines changed

7 files changed

+252
-224
lines changed

paddle/fluid/imperative/CMakeLists.txt

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,13 @@ cc_library(prepared_operator SRCS prepared_operator.cc DEPS proto_desc operator
77
ENDIF()
88
cc_library(layer SRCS layer.cc DEPS prepared_operator math_function imperative_flag variable_helper op_registry var_helper phi_api)
99
add_subdirectory(jit)
10+
if (WITH_GPU)
11+
cc_library(layout_autotune SRCS layout_autotune.cc DEPS op_info phi_gpu_info)
12+
else()
13+
cc_library(layout_autotune SRCS layout_autotune.cc DEPS op_info)
14+
endif()
1015
cc_library(amp SRCS amp_auto_cast.cc DEPS layer var_helper)
11-
cc_library(tracer SRCS tracer.cc DEPS layer engine program_desc_tracer amp denormal garbage_collector var_helper)
16+
cc_library(tracer SRCS tracer.cc DEPS layer engine program_desc_tracer amp denormal garbage_collector var_helper layout_autotune)
1217
cc_library(basic_engine SRCS basic_engine.cc DEPS layer gradient_accumulator)
1318
cc_library(engine SRCS basic_engine.cc partial_grad_engine.cc DEPS layer gradient_accumulator)
1419
cc_library(imperative_profiler SRCS profiler.cc DEPS flags)
Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "paddle/fluid/imperative/layout_autotune.h"
16+
#include "paddle/fluid/framework/op_info.h"
17+
#include "paddle/fluid/imperative/layout_transformer.h"
18+
#include "paddle/phi/backends/gpu/gpu_info.h"
19+
#include "paddle/phi/core/enforce.h"
20+
#include "paddle/phi/core/errors.h"
21+
22+
namespace paddle {
23+
namespace imperative {
24+
25+
bool LayoutAutoTune::UseLayoutAutoTune() const {
26+
#if defined(PADDLE_WITH_CUDA)
27+
if (!phi::backends::gpu::TensorCoreAvailable()) {
28+
LOG(INFO) << "Layout AutoTuning is not available.";
29+
return false;
30+
} else {
31+
return use_layout_autotune_;
32+
}
33+
#else
34+
return false;
35+
#endif
36+
}
37+
38+
LayoutAutoTune::LayoutAutoTune() {
39+
const auto& op_info = paddle::framework::OpInfoMap::Instance().map();
40+
for (auto it = op_info.begin(); it != op_info.end(); it++) {
41+
// only record forwrd operators
42+
if (it->first.find("_grad") != std::string::npos) {
43+
continue;
44+
}
45+
46+
// some normalization operators such as instance_norm and layer_norm
47+
// do not have data_format attr, but are layout sensitive.
48+
if (it->first.find("norm") != std::string::npos) {
49+
layout_agnostic_ops_.emplace(it->first);
50+
continue;
51+
}
52+
53+
auto* attr_checker = it->second.Checker();
54+
if (attr_checker) {
55+
auto attrs = attr_checker->GetDefaultAttrMap();
56+
if (attrs.find("data_format") != attrs.end() ||
57+
attrs.find("data_layout") != attrs.end()) {
58+
VLOG(4) << "Heavily layout sensitive OP: " << it->first;
59+
heavily_layout_sensitive_ops_.emplace(it->first);
60+
continue;
61+
}
62+
63+
// Attribute name is fuzzy matched, such as start and start_axis.
64+
bool layout_agnostic = true;
65+
for (auto& attr : attrs) {
66+
auto attr_name = attr.first;
67+
VLOG(6) << "OP: " << it->first << " Attr Name: " << attr_name;
68+
if (attr_name.find("axis") != std::string::npos ||
69+
attr_name.find("axes") != std::string::npos ||
70+
attr_name.find("dim") != std::string::npos ||
71+
attr_name.find("start") != std::string::npos ||
72+
attr_name.find("end") != std::string::npos) {
73+
VLOG(4) << "Lightly layout sensitive OP: " << it->first;
74+
layout_agnostic = false;
75+
lightly_layout_sensitive_ops_.emplace(it->first);
76+
break;
77+
}
78+
}
79+
80+
if (layout_agnostic) {
81+
VLOG(4) << "Layout agnostic_ops: " << it->first;
82+
layout_agnostic_ops_.emplace(it->first);
83+
}
84+
}
85+
}
86+
87+
VLOG(3) << "The number of layout agnostic OPs: "
88+
<< layout_agnostic_ops_.size() << ", heavily layout sensitive OPs: "
89+
<< heavily_layout_sensitive_ops_.size()
90+
<< ", lightly layout sensitive OPs: "
91+
<< lightly_layout_sensitive_ops_.size();
92+
}
93+
94+
template <typename VarType>
95+
paddle::imperative::NameVarMap<VarType> AutoTuneLayout(
96+
const std::string& op_type,
97+
const paddle::imperative::NameVarMap<VarType>& ins,
98+
const paddle::imperative::NameVarMap<VarType>& outs,
99+
paddle::framework::AttributeMap* attrs,
100+
const std::shared_ptr<imperative::Tracer>& tracer) {
101+
if (!LayoutAutoTune::Instance().UseLayoutAutoTune()) {
102+
return ins;
103+
}
104+
105+
// When layout autotuning is enabled, the tuner will check the desired layout.
106+
// (1) If the desired layout is undefined, and there is no convolutional
107+
// layers, layout optimization is unnecessary. Otherwise, the desired layout
108+
// will be set to the best layout only when these is a convolutional layer
109+
// with
110+
// NCHW-Layout and the TensorCore is available.
111+
// (2) If the desired layout is defined, run the transposer.
112+
113+
if (LayoutAutoTune::Instance().GetDesiredLayout() == DataLayout::UNDEFINED) {
114+
// Layout autotune only supports model with convolutional layers
115+
if (op_type != "conv2d") {
116+
return ins;
117+
} else {
118+
if (BOOST_GET_CONST(std::string, (*attrs)["data_format"]) == "NCHW") {
119+
LayoutAutoTune::Instance().SetDesiredLayout(DataLayout::NHWC);
120+
VLOG(3) << "Tune the layout from "
121+
<< BOOST_GET_CONST(std::string, (*attrs)["data_format"])
122+
<< " to " << paddle::framework::DataLayoutToString(
123+
LayoutAutoTune::Instance().GetDesiredLayout());
124+
} else {
125+
LayoutAutoTune::Instance().DisableLayoutAutoTune();
126+
return ins;
127+
}
128+
}
129+
}
130+
131+
std::shared_ptr<LayoutTransformer<VarType>> transposer = nullptr;
132+
if (op_type == "conv2d") {
133+
transposer =
134+
std::make_shared<HeavilyLayoutSensitiveOpTransformer<VarType>>(op_type);
135+
transposer->SetArguments({"Input"}, {"Output"}, {"data_format"});
136+
} else if (op_type == "batch_norm") {
137+
transposer =
138+
std::make_shared<HeavilyLayoutSensitiveOpTransformer<VarType>>(op_type);
139+
transposer->SetArguments({"X"}, {"Y"}, {"data_layout"});
140+
} else if (op_type == "pool2d") {
141+
transposer =
142+
std::make_shared<HeavilyLayoutSensitiveOpTransformer<VarType>>(op_type);
143+
transposer->SetArguments({"X"}, {"Out"}, {"data_format"});
144+
} else if (op_type == "transpose2") {
145+
transposer = std::make_shared<TransposeOpTransformer<VarType>>(op_type);
146+
} else if (op_type == "flatten_contiguous_range") {
147+
transposer = std::make_shared<FlattenOpTransformer<VarType>>(op_type);
148+
} else if (op_type.find("elementwise_") != std::string::npos) {
149+
transposer = std::make_shared<ElementwiseOpTransformer<VarType>>(op_type);
150+
} else if (LayoutAutoTune::Instance().IsLayoutAgnostic(op_type)) {
151+
transposer = std::make_shared<LayoutTransformer<VarType>>(op_type);
152+
} else if (LayoutAutoTune::Instance().IsLightlyLayoutSensitive(op_type)) {
153+
transposer =
154+
std::make_shared<LightlyLayoutSensitiveOpTransformer<VarType>>(op_type);
155+
} else {
156+
PADDLE_ENFORCE_NOT_NULL(
157+
transposer, phi::errors::Unimplemented(
158+
"%s 's LayoutTransformer is unimplemented.", op_type));
159+
}
160+
161+
return transposer->Apply(ins, outs, attrs, tracer);
162+
}
163+
template paddle::imperative::NameVarMap<VarBase> AutoTuneLayout<VarBase>(
164+
const std::string& op_type,
165+
const paddle::imperative::NameVarMap<VarBase>& ins,
166+
const paddle::imperative::NameVarMap<VarBase>& outs,
167+
paddle::framework::AttributeMap* attrs,
168+
const std::shared_ptr<imperative::Tracer>& tracer);
169+
template paddle::imperative::NameVarMap<egr::EagerVariable>
170+
AutoTuneLayout<egr::EagerVariable>(
171+
const std::string& op_type,
172+
const paddle::imperative::NameVarMap<egr::EagerVariable>& ins,
173+
const paddle::imperative::NameVarMap<egr::EagerVariable>& outs,
174+
paddle::framework::AttributeMap* attrs,
175+
const std::shared_ptr<imperative::Tracer>& tracer);
176+
177+
} // namespace imperative
178+
} // namespace paddle

paddle/fluid/imperative/layout_autotune.h

Lines changed: 19 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,14 @@
1515
#pragma once
1616
#include <glog/logging.h>
1717
#include <memory>
18-
#include <set>
19-
#include "paddle/fluid/framework/op_info.h"
20-
#include "paddle/phi/backends/gpu/gpu_info.h"
18+
#include <unordered_set>
2119
#include "paddle/phi/common/layout.h"
22-
#include "paddle/phi/common/place.h"
2320
#include "paddle/phi/core/compat/type_defs.h"
2421

25-
namespace phi {
26-
namespace autotune {
22+
namespace paddle {
23+
namespace imperative {
2724

28-
class DenseTensor;
25+
class Tracer;
2926

3027
using DataLayout = paddle::experimental::DataLayout;
3128

@@ -36,91 +33,26 @@ class LayoutAutoTune {
3633
return layout_autoTune;
3734
}
3835

39-
bool UseLayoutAutoTune() {
40-
#if defined(PADDLE_WITH_CUDA)
41-
if (!phi::backends::gpu::TensorCoreAvailable()) {
42-
LOG(INFO) << "Layout AutoTuning is not available.";
43-
return false;
44-
} else {
45-
return use_layout_autotune_;
46-
}
47-
#else
48-
return false;
49-
#endif
50-
}
36+
bool UseLayoutAutoTune() const;
5137

5238
void EnableLayoutAutoTune() { use_layout_autotune_ = true; }
5339

5440
void DisableLayoutAutoTune() { use_layout_autotune_ = false; }
5541

56-
bool IsLightlyLayoutSensitive(const std::string& op_type) {
42+
bool IsLightlyLayoutSensitive(const std::string& op_type) const {
5743
return lightly_layout_sensitive_ops_.count(op_type) != 0;
5844
}
5945

60-
bool IsLayoutAgnostic(const std::string& op_type) {
46+
bool IsLayoutAgnostic(const std::string& op_type) const {
6147
return layout_agnostic_ops_.count(op_type) != 0;
6248
}
6349

64-
DataLayout GetDesiredLayout() { return layout_; }
50+
DataLayout GetDesiredLayout() const { return layout_; }
6551

6652
void SetDesiredLayout(const DataLayout& layout) { layout_ = layout; }
6753

6854
private:
69-
LayoutAutoTune() {
70-
const auto& op_info = paddle::framework::OpInfoMap::Instance().map();
71-
for (auto it = op_info.begin(); it != op_info.end(); it++) {
72-
// only record forwrd operators
73-
if (it->first.find("_grad") != std::string::npos) {
74-
continue;
75-
}
76-
77-
// some normalization operators such as instance_norm and layer_norm
78-
// do not have data_format attr, but are layout sensitive.
79-
if (it->first.find("norm") != std::string::npos) {
80-
layout_agnostic_ops_.emplace(it->first);
81-
continue;
82-
}
83-
84-
auto* attr_checker = it->second.Checker();
85-
if (attr_checker) {
86-
auto attrs = attr_checker->GetDefaultAttrMap();
87-
if (attrs.find("data_format") != attrs.end() ||
88-
attrs.find("data_layout") != attrs.end()) {
89-
VLOG(4) << "Heavily layout sensitive OP: " << it->first;
90-
heavily_layout_sensitive_ops_.emplace(it->first);
91-
continue;
92-
}
93-
94-
// Attribute name is fuzzy matched, such as start and start_axis.
95-
bool layout_agnostic = true;
96-
for (auto& attr : attrs) {
97-
auto attr_name = attr.first;
98-
VLOG(6) << "OP: " << it->first << " Attr Name: " << attr_name;
99-
if (attr_name.find("axis") != std::string::npos ||
100-
attr_name.find("axes") != std::string::npos ||
101-
attr_name.find("dim") != std::string::npos ||
102-
attr_name.find("start") != std::string::npos ||
103-
attr_name.find("end") != std::string::npos) {
104-
VLOG(4) << "Lightly layout sensitive OP: " << it->first;
105-
layout_agnostic = false;
106-
lightly_layout_sensitive_ops_.emplace(it->first);
107-
break;
108-
}
109-
}
110-
111-
if (layout_agnostic) {
112-
VLOG(4) << "Layout agnostic_ops: " << it->first;
113-
layout_agnostic_ops_.emplace(it->first);
114-
}
115-
}
116-
}
117-
118-
VLOG(3) << "The number of layout agnostic OPs: "
119-
<< layout_agnostic_ops_.size() << ", heavily layout sensitive OPs: "
120-
<< heavily_layout_sensitive_ops_.size()
121-
<< ", lightly layout sensitive OPs: "
122-
<< lightly_layout_sensitive_ops_.size();
123-
}
55+
LayoutAutoTune();
12456

12557
bool use_layout_autotune_{false};
12658

@@ -133,5 +65,13 @@ class LayoutAutoTune {
13365
DataLayout layout_{DataLayout::UNDEFINED};
13466
};
13567

136-
} // namespace autotune
137-
} // namespace phi
68+
template <typename VarType>
69+
paddle::imperative::NameVarMap<VarType> AutoTuneLayout(
70+
const std::string& op_type,
71+
const paddle::imperative::NameVarMap<VarType>& ins,
72+
const paddle::imperative::NameVarMap<VarType>& outs,
73+
paddle::framework::AttributeMap* attrs,
74+
const std::shared_ptr<imperative::Tracer>& tracer);
75+
76+
} // namespace imperative
77+
} // namespace paddle

0 commit comments

Comments
 (0)