Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions paddle/fluid/operators/memcpy_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,10 @@ class MemcpyFunctor {
void operator()(const framework::LoDTensor &lod_tensor) const {
auto &out_tensor = *out_->GetMutable<framework::LoDTensor>();

if (dst_place_type_ == 3) {
if (dst_place_type_ == 2) {
framework::TensorCopy(lod_tensor, platform::CUDAPinnedPlace(), dev_ctx_,
&out_tensor);
} else if (dst_place_type_ == 2) {
} else if (dst_place_type_ == 1) {
framework::TensorCopy(lod_tensor, dev_ctx_.GetPlace(), dev_ctx_,
&out_tensor);
} else {
Expand Down
12 changes: 7 additions & 5 deletions python/paddle/fluid/optimizer.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -4764,7 +4764,7 @@ def _append_fill_constant_ops(self, startup_program):
return

def _insert_async_memcpy_op(self, insert_idx, src_varname, dst_varname,
op_role, kind):
op_role, dst_place_type):
OP_ROLE_KEY = core.op_proto_and_checker_maker.kOpRoleAttrName()
self.block._insert_op_without_sync(
insert_idx,
Expand All @@ -4773,22 +4773,24 @@ def _insert_async_memcpy_op(self, insert_idx, src_varname, dst_varname,
outputs={
'Out': [self._main_program.global_block().var(dst_varname)]
},
attrs={"dst_place_type": int(kind),
OP_ROLE_KEY: op_role})
attrs={
"dst_place_type": int(dst_place_type),
OP_ROLE_KEY: op_role
})

def _insert_fetch_op(self, idx, varname):
assert varname in self.checkpoint_name2pinned_name, "Try to fetch {} from Pinned Memory, but it is NOT a checkpoint".format(
varname)

pinned_varname = self.checkpoint_name2pinned_name[varname]
fetch_varname = self.checkpoint_name2fetch_name[varname]
self._insert_async_memcpy_op(idx, pinned_varname, fetch_varname, 1, 2)
self._insert_async_memcpy_op(idx, pinned_varname, fetch_varname, 1, 1)

def _insert_offload_op(self, idx, varname):
assert varname in self.checkpoint_name2pinned_name, "Try to offload {} to Pinned Memory, but it is NOT a checkpoint".format(
varname)
pinned_varname = self.checkpoint_name2pinned_name[varname]
self._insert_async_memcpy_op(idx, varname, pinned_varname, 0, 3)
self._insert_async_memcpy_op(idx, varname, pinned_varname, 0, 2)

def _insert_sync_op(self, op_idx, checkpoint_name):
# single stream offload no need sync
Expand Down
6 changes: 3 additions & 3 deletions python/paddle/fluid/tests/unittests/test_memcpy_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def test_gpu_cpoy_to_pinned(self):
type='memcpy',
inputs={'X': gpu_var},
outputs={'Out': pinned_var},
attrs={'dst_place_type': 3})
attrs={'dst_place_type': 2})
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
gpu_, pinned_ = exe.run(main_program,
Expand All @@ -85,7 +85,7 @@ def test_pinned_cpoy_gpu(self):
type='memcpy',
inputs={'X': pinned_var},
outputs={'Out': gpu_var},
attrs={'dst_place_type': 2})
attrs={'dst_place_type': 1})
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
gpu_, pinned_ = exe.run(main_program,
Expand Down Expand Up @@ -135,7 +135,7 @@ def test_SELECTED_ROWS(self):
type='memcpy',
inputs={'X': selected_row_var},
outputs={'Out': pinned_var},
attrs={'dst_place_type': 3})
attrs={'dst_place_type': 2})
with self.assertRaises(NotImplementedError):
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
Expand Down