Skip to content

Commit 1c0f1f7

Browse files
committed
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into trt-IPluginV2Ext
2 parents 80cef1e + 646eb4f commit 1c0f1f7

File tree

17 files changed

+434
-73
lines changed

17 files changed

+434
-73
lines changed

paddle/fluid/framework/ir/adaptive_pool2d_convert_global_pass.cc

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,46 @@ namespace paddle {
2424
namespace framework {
2525
namespace ir {
2626

27+
AdaptivePool2dConvertGlobalPass::AdaptivePool2dConvertGlobalPass() {
28+
AddOpCompat(OpCompat("pool2d"))
29+
.AddInput("X")
30+
.IsTensor()
31+
.End()
32+
.AddOutput("Out")
33+
.IsTensor()
34+
.End()
35+
.AddAttr("pooling_type")
36+
.IsStringIn({"max", "avg"})
37+
.End()
38+
.AddAttr("ksize")
39+
.IsType<std::vector<int>>()
40+
.End()
41+
.AddAttr("global_pooling")
42+
.IsBoolEQ(true)
43+
.End()
44+
.AddAttr("strides")
45+
.IsType<std::vector<int>>()
46+
.End()
47+
.AddAttr("paddings")
48+
.IsType<std::vector<int>>()
49+
.End()
50+
.AddAttr("exclusive")
51+
.IsType<bool>()
52+
.End()
53+
.AddAttr("adaptive")
54+
.IsBoolEQ(false)
55+
.End()
56+
.AddAttr("ceil_mode")
57+
.IsType<bool>()
58+
.End()
59+
.AddAttr("data_format")
60+
.IsStringIn({"NHWC", "NCHW"})
61+
.End()
62+
.AddAttr("padding_algorithm")
63+
.IsStringIn({"EXPLICIT", "SAME", "VALID"})
64+
.End();
65+
}
66+
2767
void AdaptivePool2dConvertGlobalPass::ApplyImpl(ir::Graph* graph) const {
2868
std::string name_scope = "adaptive_pool2d_convert_global_pass";
2969
FusePassBase::Init(name_scope, graph);

paddle/fluid/framework/ir/adaptive_pool2d_convert_global_pass.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class Graph;
3131
*/
3232
class AdaptivePool2dConvertGlobalPass : public FusePassBase {
3333
public:
34+
AdaptivePool2dConvertGlobalPass();
3435
virtual ~AdaptivePool2dConvertGlobalPass() {}
3536

3637
protected:

paddle/fluid/framework/ir/map_matmul_to_mul_pass.cc

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,68 @@ void Squeeze2MatmulFusePass::ApplyImpl(ir::Graph* graph) const {
267267
AddStatis(found_count);
268268
}
269269

270+
Reshape2MatmulFusePass::Reshape2MatmulFusePass() {
271+
AddOpCompat(OpCompat("reshape2"))
272+
.AddInput("X")
273+
.IsTensor()
274+
.End()
275+
.AddInput("Shape")
276+
.IsTensor()
277+
.IsOptional()
278+
.End()
279+
.AddInput("ShapeTensor")
280+
.IsTensor()
281+
.IsOptional()
282+
.End()
283+
.AddOutput("Out")
284+
.IsTensor()
285+
.End()
286+
.AddOutput("XShape")
287+
.IsTensor()
288+
.End()
289+
.AddAttr("shape") // ints
290+
.IsType<std::vector<int>>()
291+
.End();
292+
293+
AddOpCompat(OpCompat("matmul"))
294+
.AddInput("X")
295+
.IsTensor()
296+
.End()
297+
.AddInput("Y")
298+
.IsTensor()
299+
.End()
300+
.AddOutput("Out")
301+
.IsTensor()
302+
.End()
303+
.AddAttr("alpha")
304+
.IsNumGT(0.99999f)
305+
.IsNumLT(1.00001f)
306+
.End()
307+
.AddAttr("transpose_X")
308+
.IsBoolEQ("False")
309+
.End()
310+
.AddAttr("transpose_Y")
311+
.IsBoolEQ("False")
312+
.End();
313+
314+
AddOpCompat(OpCompat("mul"))
315+
.AddInput("X")
316+
.IsTensor()
317+
.End()
318+
.AddInput("Y")
319+
.IsTensor()
320+
.End()
321+
.AddOutput("Out")
322+
.IsTensor()
323+
.End()
324+
.AddAttr("x_num_col_dims")
325+
.IsNumEQ(1)
326+
.End()
327+
.AddAttr("y_num_col_dims")
328+
.IsNumEQ(1)
329+
.End();
330+
}
331+
270332
void Reshape2MatmulFusePass::ApplyImpl(ir::Graph* graph) const {
271333
PADDLE_ENFORCE_NOT_NULL(
272334
graph, platform::errors::InvalidArgument("Graph cannot be nullptr."));
@@ -280,6 +342,10 @@ void Reshape2MatmulFusePass::ApplyImpl(ir::Graph* graph) const {
280342
int found_count = 0;
281343
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
282344
Graph* g) {
345+
if (!IsCompat(subgraph, g)) {
346+
LOG(WARNING) << "Pass in op compat failed.";
347+
return;
348+
}
283349
VLOG(4) << "fuse reshape2+matmul to mul";
284350
GET_IR_NODE_FROM_SUBGRAPH(reshape2_in_x, reshape2_in_x, fuse_pattern);
285351
GET_IR_NODE_FROM_SUBGRAPH(reshape2_op, reshape2_op, fuse_pattern);
@@ -326,6 +392,10 @@ void Reshape2MatmulFusePass::ApplyImpl(ir::Graph* graph) const {
326392
desc.SetAttr("X_scale", matmul_op->Op()->GetAttr("X_scale"));
327393
desc.SetAttr("weight_scale", matmul_op->Op()->GetAttr("weight_scale"));
328394
}
395+
if (!IsCompat(desc)) {
396+
LOG(WARNING) << "reshape2 matmul pass in out mul op compat failed.";
397+
return;
398+
}
329399
auto mul_node = g->CreateOpNode(&desc);
330400
IR_NODE_LINK_TO(reshape2_in_x, mul_node);
331401
IR_NODE_LINK_TO(matmul_in_y, mul_node);

paddle/fluid/framework/ir/map_matmul_to_mul_pass.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ class Squeeze2MatmulFusePass : public FusePassBase {
9696

9797
class Reshape2MatmulFusePass : public FusePassBase {
9898
public:
99+
Reshape2MatmulFusePass();
99100
virtual ~Reshape2MatmulFusePass() {}
100101

101102
protected:

paddle/fluid/pybind/op_function.h

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -209,11 +209,16 @@ inline bool PyObject_CheckLongOrToLong(PyObject** obj) {
209209
PyObject_IsInstance(*obj, (PyObject*)g_varbase_pytype)) { // NOLINT
210210
return true;
211211
}
212-
auto to = PyNumber_Long(*obj);
213-
if (to) {
214-
*obj = to;
215-
return true;
212+
213+
if (std::string(((PyTypeObject*)(*obj)->ob_type)->tp_name) // NOLINT
214+
.find("numpy") != std::string::npos) {
215+
auto to = PyNumber_Long(*obj);
216+
if (to) {
217+
*obj = to;
218+
return true;
219+
}
216220
}
221+
217222
return false;
218223
}
219224

@@ -223,10 +228,13 @@ inline bool PyObject_CheckFloatOrToFloat(PyObject** obj) {
223228
PyObject_IsInstance(*obj, (PyObject*)g_varbase_pytype)) { // NOLINT
224229
return true;
225230
}
226-
auto to = PyNumber_Float(*obj);
227-
if (to) {
228-
*obj = to;
229-
return true;
231+
if (std::string(((PyTypeObject*)(*obj)->ob_type)->tp_name) // NOLINT
232+
.find("numpy") != std::string::npos) {
233+
auto to = PyNumber_Float(*obj);
234+
if (to) {
235+
*obj = to;
236+
return true;
237+
}
230238
}
231239
return false;
232240
}

paddle/scripts/paddle_build.sh

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1445,7 +1445,6 @@ function precise_card_test_single {
14451445
mkdir ${PADDLE_ROOT}/build/ut_map/$case
14461446
fi
14471447
set -x
1448-
mkdir ${PADDLE_ROOT}/build/ut_map/$case
14491448
find paddle/fluid -name '*.gcda'|xargs -I {} cp --path {} ut_map/$case
14501449
find paddle/fluid -name '*.gcno'|xargs -I {} cp --path {} ut_map/$case
14511450
python ${PADDLE_ROOT}/tools/get_single_test_cov.py ${PADDLE_ROOT} $case &
@@ -2142,6 +2141,23 @@ function reuse_so_cache() {
21422141
fi
21432142
}
21442143

2144+
function find_temporary_files() {
2145+
set +x
2146+
jsonData=`curl \
2147+
-H "Authorization: token ${GITHUB_API_TOKEN}"\
2148+
-H "Accept: application/vnd.github.v3+json" \
2149+
https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/files`
2150+
2151+
result=`echo ${jsonData}|python ${PADDLE_ROOT}/tools/check_file_suffix.py`
2152+
2153+
if [ ${#result} -gt 0 ]
2154+
then
2155+
echo ${result}
2156+
exit 65
2157+
fi
2158+
}
2159+
2160+
21452161
function main() {
21462162
local CMD=$1
21472163
local parallel_number=$2
@@ -2154,6 +2170,7 @@ function main() {
21542170
set +e
21552171
check_style_info=$(check_style)
21562172
check_style_code=$?
2173+
find_temporary_files
21572174
generate_upstream_develop_api_spec ${PYTHON_ABI:-""} ${parallel_number}
21582175
cmake_gen_and_build ${PYTHON_ABI:-""} ${parallel_number}
21592176
check_sequence_op_unittest

python/paddle/distributed/collective.py

Lines changed: 81 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1219,6 +1219,65 @@ def _parallel_embedding(x,
12191219
return out
12201220

12211221

1222+
def _parallel_embedding_npu(x,
1223+
per_part_embeddings,
1224+
origin_size,
1225+
param_attr,
1226+
inner_rank,
1227+
num_partitions,
1228+
name,
1229+
group=None):
1230+
"""
1231+
NPU Parallel Embedding
1232+
"""
1233+
if group is not None and not group.is_member():
1234+
return
1235+
ring_id = 0 if group is None else group.id
1236+
1237+
origin_num_embeddings = origin_size[0]
1238+
embedding = paddle.nn.Embedding(
1239+
per_part_embeddings,
1240+
origin_size[1],
1241+
padding_idx=per_part_embeddings - 1,
1242+
sparse=False,
1243+
weight_attr=param_attr,
1244+
name=name)
1245+
1246+
origin_input_shape = x.shape
1247+
if len(origin_input_shape) == 2:
1248+
x = paddle.unsqueeze(x, axis=-1)
1249+
else:
1250+
assert origin_input_shape[-1] == 1, (
1251+
"The last dimension size of x must be 1.")
1252+
x_shard = paddle.shard_index(x, origin_num_embeddings, num_partitions,
1253+
inner_rank, per_part_embeddings - 1)
1254+
if len(origin_input_shape) == 2:
1255+
x_shard = paddle.squeeze(x_shard, axis=-1)
1256+
emb_out = embedding(x_shard)
1257+
startup_block = paddle.static.default_startup_program().global_block()
1258+
main_block = paddle.static.default_main_program().global_block()
1259+
startup_block.vars[embedding.weight.name].is_distributed = True
1260+
main_block.vars[embedding.weight.name].is_distributed = True
1261+
out = main_block.create_var(
1262+
shape=emb_out.shape,
1263+
dtype=emb_out.dtype,
1264+
type=emb_out.type,
1265+
lod_level=emb_out.lod_level,
1266+
persistable=False,
1267+
is_data=False,
1268+
need_check_feed=emb_out.desc.need_check_feed())
1269+
main_block.append_op(
1270+
type='c_allreduce_sum',
1271+
inputs={'X': emb_out},
1272+
outputs={'Out': out},
1273+
attrs={
1274+
'ring_id': ring_id,
1275+
'use_calc_stream': True,
1276+
'use_model_parallel': True
1277+
})
1278+
return out
1279+
1280+
12221281
def split(x,
12231282
size,
12241283
operation,
@@ -1332,16 +1391,28 @@ def split(x,
13321391
"but received vocabulary={} num_partitions={}".format(size[0], num_partitions)
13331392

13341393
per_part_size = size[0] // num_partitions
1335-
emb_out = _parallel_embedding(
1336-
x,
1337-
per_part_size,
1338-
size,
1339-
weight_attr,
1340-
inner_rank,
1341-
num_partitions,
1342-
name,
1343-
group=None)
1344-
return emb_out
1394+
if core.is_compiled_with_npu():
1395+
emb_out = _parallel_embedding_npu(
1396+
x,
1397+
per_part_size,
1398+
size,
1399+
weight_attr,
1400+
inner_rank,
1401+
num_partitions,
1402+
name,
1403+
group=None)
1404+
return emb_out
1405+
else:
1406+
emb_out = _parallel_embedding(
1407+
x,
1408+
per_part_size,
1409+
size,
1410+
weight_attr,
1411+
inner_rank,
1412+
num_partitions,
1413+
name,
1414+
group=None)
1415+
return emb_out
13451416
else:
13461417
should_split = False
13471418
if axis == 0:

python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ def dtype2ge(self, dtype):
136136

137137
def dtype2np(self, index):
138138
assert index in self.dtype2np_map, "index[%d] is not supported %d" % (
139-
dtype)
139+
index)
140140
return self.dtype2np_map[index]
141141

142142

@@ -342,7 +342,7 @@ def _apply(self):
342342
y = self._get_ge_input(self.op.input_arg_names[1])
343343
pow = core.GEOperatorFactory.create_operator(
344344
"dotpow" + self._accumulated_op_id(),
345-
"Pow").set_input("x1", x1).set_input("x2", y)
345+
"Pow").set_input("x1", x).set_input("x2", y)
346346
return [pow], [[0]]
347347

348348

@@ -918,15 +918,15 @@ def _apply(self):
918918
scatter_value = core.GEOperatorFactory.create_operator(
919919
"scatter" + self._accumulated_op_id(),
920920
"TensorScatterAdd").set_input(
921-
"x", x_var).set_input("indices", index_var).set_input(
922-
"updates", updatesi_var)
921+
"x", x).set_input("indices", index).set_input("updates",
922+
updates)
923923
else:
924924
scatter_value = core.GEOperatorFactory.create_operator(
925925
"scatter" + self._accumulated_op_id(),
926926
"TensorScatterUpdate").set_input(
927-
"x", x_var).set_input("indices", index_var).set_input(
928-
"updates", updates_var)
929-
return [x_var, index_var, updates_var, scatter_value], [[-1]]
927+
"x", x).set_input("indices", index).set_input("updates",
928+
updates)
929+
return [x, index, updates, scatter_value], [[-1]]
930930

931931

932932
class CastParser(AscendParserBase):

python/paddle/fluid/tests/unittests/asp/test_asp_pruning_2d_best.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from __future__ import print_function
1717

1818
import paddle
19+
import unittest
1920
from paddle.fluid.contrib import sparsity
2021
from paddle.fluid.tests.unittests.asp.asp_pruning_base import TestASPHelperPruningBase
2122

python/paddle/fluid/tests/unittests/npu/test_collective_base_npu.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import paddle.fluid as fluid
2828
import paddle.fluid.unique_name as nameGen
2929
from paddle.fluid import core
30+
from six import string_types
3031

3132

3233
class TestCollectiveRunnerBase(object):

0 commit comments

Comments
 (0)