Skip to content

Commit

Permalink
[with_data_parallel][part13] remove with_data_parallel in example code (
Browse files Browse the repository at this point in the history
#51588)

* remove with_data_parallel in example code

* revert python/paddle/fluid/data_feeder.py

* fix static.nn.fc api
  • Loading branch information
kangguangli authored Mar 15, 2023
1 parent 5b3c7ee commit 14f1973
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 100 deletions.
41 changes: 10 additions & 31 deletions paddle/fluid/pybind/parallel_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -372,28 +372,21 @@ void BindParallelExecutor(pybind11::module &m) { // NOLINT
Examples:
.. code-block:: python
import os
import paddle
import paddle.static as static
paddle.enable_static()
os.environ['CPU_NUM'] = str(2)
places = static.cpu_places()
data = static.data(name="x", shape=[None, 1], dtype="float32")
hidden = static.nn.fc(input=data, size=10)
hidden = static.nn.fc(data, size=10)
loss = paddle.mean(hidden)
paddle.optimizer.SGD(learning_rate=0.01).minimize(loss)
build_strategy = static.BuildStrategy()
build_strategy.enable_inplace = True
build_strategy.memory_optimize = True
build_strategy.reduce_strategy = static.BuildStrategy.ReduceStrategy.Reduce
program = static.CompiledProgram(static.default_main_program())
program = program.with_data_parallel(loss_name=loss.name,
build_strategy=build_strategy,
places=places)
program = static.CompiledProgram(static.default_main_program(), build_strategy=build_strategy)
)DOC");

py::enum_<BuildStrategy::ReduceStrategy>(build_strategy, "ReduceStrategy")
Expand Down Expand Up @@ -461,7 +454,6 @@ void BindParallelExecutor(pybind11::module &m) { // NOLINT
.. code-block:: python
import numpy
import os
import paddle
import paddle.static as static
Expand All @@ -471,40 +463,27 @@ void BindParallelExecutor(pybind11::module &m) { // NOLINT
place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
exe = static.Executor(place)
# NOTE: If you use CPU to run the program, you need
# to specify the CPU_NUM, otherwise, paddle will use
# all the number of the logic core as the CPU_NUM,
# in that case, the batch size of the input should be
# greater than CPU_NUM, if not, the process will be
# failed by an exception.
if not use_cuda:
os.environ['CPU_NUM'] = str(2)
places = static.cpu_places()
else:
places = static.cuda_places()
data = static.data(name='X', shape=[None, 1], dtype='float32')
hidden = static.nn.fc(input=data, size=10)
hidden = static.nn.fc(data, size=10)
loss = paddle.mean(hidden)
paddle.optimizer.SGD(learning_rate=0.01).minimize(loss)
exe.run(static.default_startup_program())
build_strategy = static.BuildStrategy()
build_strategy.gradient_scale_strategy = \
static.BuildStrategy.GradientScaleStrategy.Customized
static.BuildStrategy.GradientScaleStrategy.Customized
compiled_prog = static.CompiledProgram(
static.default_main_program()).with_data_parallel(
loss_name=loss.name, build_strategy=build_strategy,
places=places)
static.default_main_program(),
build_strategy=build_strategy,
)
dev_count = len(places)
x = numpy.random.random(size=(10, 1)).astype('float32')
loss_grad = numpy.ones((dev_count)).astype("float32") * 0.01
loss_grad = numpy.ones((1)).astype("float32") * 0.01
loss_grad_name = loss.name+"@GRAD"
loss_data = exe.run(compiled_prog,
feed={"X": x, loss_grad_name : loss_grad},
fetch_list=[loss.name, loss_grad_name])
feed={"X": x, loss_grad_name : loss_grad},
fetch_list=[loss.name, loss_grad_name])
)DOC")
.def_property(
"debug_graphviz_path",
Expand Down
12 changes: 0 additions & 12 deletions python/paddle/fluid/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,18 +82,6 @@ def _has_optimize_op(block):
return False


def _has_optimizer_in_control_flow(program):
if not program:
program = framework.default_main_program()
for op in program.global_block().ops:
if op.type == "conditional_block_grad":
sub_block = program.block(op._block_attr_id("sub_block"))
if _has_optimize_op(sub_block):
return True

return False


def _should_broadcast_or_not_exists(program, var_name):
block = program.global_block()
var = block.vars.get(var_name, None)
Expand Down
58 changes: 1 addition & 57 deletions python/paddle/fluid/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -798,21 +798,13 @@ def set_data_source(loader, places):
# Define network
loss = simple_net(image, label)
# Set data source of DataLoader
#
# If DataLoader is iterable, places must be given and the number of places must be the same with device number.
# - If you are using GPU, call `paddle.static.cuda_places()` to get all GPU places.
# - If you are using CPU, call `paddle.static.cpu_places()` to get all CPU places.
#
# If DataLoader is not iterable, places can be None.
places = static.cuda_places() if USE_GPU else static.cpu_places()
set_data_source(loader, places)
exe = static.Executor(places[0])
exe.run(static.default_startup_program())
prog = static.CompiledProgram(static.default_main_program()).with_data_parallel(loss_name=loss.name)
prog = static.CompiledProgram(static.default_main_program())
if loader.iterable:
train_iterable(exe, prog, loss, loader)
else:
Expand Down Expand Up @@ -890,54 +882,6 @@ def forward(self, x):
print("Epoch {} batch {}: loss = {}".format(
epoch_id, batch_id, np.mean(loss.numpy())))
Examples 3:
.. code-block:: python
'''
Example of `drop_last` using in static graph multi-cards mode
'''
import paddle
import paddle.static as static
import numpy as np
import os
# We use 2 CPU cores to run inference network
os.environ['CPU_NUM'] = '2'
paddle.enable_static()
# The data source has only 3 batches, which can not be
# divided evenly to each CPU core
def batch_generator():
for i in range(3):
yield np.array([i+1]).astype('float32'),
x = static.data(name='x', shape=[None], dtype='float32')
y = x * x
def run_inference(drop_last):
loader = paddle.io.DataLoader.from_generator(feed_list=[x],
capacity=8, drop_last=drop_last)
loader.set_batch_generator(batch_generator, static.cpu_places())
exe = static.Executor(paddle.CPUPlace())
prog = static.CompiledProgram(static.default_main_program())
prog = prog.with_data_parallel()
result = []
for data in loader():
each_ret, = exe.run(prog, feed=data, fetch_list=[y])
result.extend(each_ret)
return result
# Set drop_last to True, so that the last batch whose
# number is less than CPU core number would be discarded.
print(run_inference(drop_last=True)) # [1.0, 4.0]
# Set drop_last to False, so that the last batch whose
# number is less than CPU core number can be tested.
print(run_inference(drop_last=False)) # [1.0, 4.0, 9.0]
"""
if _non_static_mode():
return DygraphGeneratorLoader(
Expand Down

0 comments on commit 14f1973

Please sign in to comment.