Skip to content

Commit 56ef9ab

Browse files
committed
Avoid bug on 'MAC python3.5/6'. (PaddlePaddle#30485)
* Avoid bug on 'MAC python3.5/6'. * Choose the saving method according to the OS. * smaller length of '_unpack_saved_dict' for MAC OS. * add version information of Python. * Edit comment.
1 parent 2967624 commit 56ef9ab

File tree

5 files changed

+58
-16
lines changed

5 files changed

+58
-16
lines changed

python/paddle/fluid/io.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import pickle
2323
import contextlib
2424
from functools import reduce
25+
import sys
2526

2627
import numpy as np
2728
import math
@@ -1715,7 +1716,7 @@ def _unpack_saved_dict(saved_obj):
17151716
unpack_infor = {}
17161717
for key, value in saved_obj.items():
17171718
if isinstance(value, np.ndarray):
1718-
MAX_NUMBER_OF_ELEMENT = 2**22
1719+
MAX_NUMBER_OF_ELEMENT = int((2**30 - 1) / value.dtype.itemsize)
17191720
num_element = np.prod(value.shape)
17201721
if num_element > MAX_NUMBER_OF_ELEMENT:
17211722
unpack_infor[key] = {}
@@ -1809,8 +1810,18 @@ def get_tensor(var):
18091810
parameter_list = list(filter(is_parameter, program.list_vars()))
18101811
param_dict = {p.name: get_tensor(p) for p in parameter_list}
18111812
param_dict = _unpack_saved_dict(param_dict)
1812-
with open(model_path + ".pdparams", 'wb') as f:
1813-
pickle.dump(param_dict, f, protocol=2)
1813+
1814+
# When value of dict is lager than 4GB ,there is a Bug on 'MAC python3.5/6'
1815+
if sys.platform == 'darwin' and sys.version_info.major == 3 and (
1816+
sys.version_info.minor == 5 or sys.version_info.minor == 6):
1817+
pickle_bytes = pickle.dumps(param_dict, protocol=2)
1818+
with open(model_path + ".pdparams", 'wb') as f:
1819+
max_bytes = 2**30
1820+
for i in range(0, len(pickle_bytes), max_bytes):
1821+
f.write(pickle_bytes[i:i + max_bytes])
1822+
else:
1823+
with open(model_path + ".pdparams", 'wb') as f:
1824+
pickle.dump(param_dict, f, protocol=2)
18141825

18151826
optimizer_var_list = list(
18161827
filter(is_belong_to_optimizer, program.list_vars()))

python/paddle/fluid/tests/unittests/test_paddle_save_load.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
import unittest
1818
import numpy as np
19+
import os
1920
import paddle
2021
import paddle.nn as nn
2122
import paddle.optimizer as opt
@@ -90,13 +91,13 @@ def test_large_parameters_paddle_save(self):
9091
layer = LayerWithLargeParameters()
9192
save_dict = layer.state_dict()
9293

93-
path = "test_paddle_save_load_large_param_save/layer" + ".pdparams"
94+
path = os.path.join("test_paddle_save_load_large_param_save",
95+
"layer.pdparams")
9496
paddle.save(layer.state_dict(), path)
9597
dict_load = paddle.load(path)
9698
# compare results before and after saving
9799
for key, value in save_dict.items():
98-
self.assertTrue(
99-
np.sum(np.abs(dict_load[key] - value.numpy())) < 1e-15)
100+
self.assertTrue(np.array_equal(dict_load[key], value.numpy()))
100101

101102

102103
class TestSaveLoad(unittest.TestCase):

python/paddle/fluid/tests/unittests/test_static_save_load.py

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1210,7 +1210,7 @@ def test_large_parameters_static_save(self):
12101210
name="static_save_load_large_x",
12111211
shape=[None, 10],
12121212
dtype='float32')
1213-
z = paddle.static.nn.fc(x, LARGE_PARAM)
1213+
z = paddle.static.nn.fc(x, LARGE_PARAM, bias_attr=False)
12141214
place = paddle.CPUPlace()
12151215
exe = paddle.static.Executor(place)
12161216
exe.run(paddle.static.default_startup_program())
@@ -1220,16 +1220,36 @@ def test_large_parameters_static_save(self):
12201220
result_z = exe.run(program=prog,
12211221
feed={"static_save_load_large_x": inputs},
12221222
fetch_list=[z.name])
1223-
path = "test_static_save_load_large_param/static_save"
1223+
base_map = {}
1224+
for var in prog.list_vars():
1225+
if isinstance(var, framework.Parameter) or var.persistable:
1226+
t = np.array(fluid.global_scope().find_var(var.name)
1227+
.get_tensor())
1228+
# make sure all the paramerter or optimizer var have been update
1229+
self.assertTrue(np.sum(np.abs(t)) != 0)
1230+
base_map[var.name] = t
1231+
1232+
path = os.path.join("test_static_save_load_large_param",
1233+
"static_save")
12241234
paddle.fluid.save(prog, path)
1235+
# set var to zero
1236+
for var in prog.list_vars():
1237+
if isinstance(var, framework.Parameter) or var.persistable:
1238+
ten = fluid.global_scope().find_var(var.name).get_tensor()
1239+
ten.set(np.zeros_like(np.array(ten)), place)
1240+
1241+
new_t = np.array(fluid.global_scope().find_var(var.name)
1242+
.get_tensor())
1243+
self.assertTrue(np.sum(np.abs(new_t)) == 0)
12251244

12261245
paddle.fluid.load(prog, path)
1227-
result_load = exe.run(program=prog,
1228-
feed={"static_save_load_large_x": inputs},
1229-
fetch_list=[z.name])
1230-
# compare results before and after saving
1231-
self.assertTrue(
1232-
np.sum(np.abs(result_z[0] - result_load[0])) < 1e-15)
1246+
1247+
for var in prog.list_vars():
1248+
if isinstance(var, framework.Parameter) or var.persistable:
1249+
new_t = np.array(fluid.global_scope().find_var(var.name)
1250+
.get_tensor())
1251+
base_t = base_map[var.name]
1252+
self.assertTrue(np.array_equal(new_t, base_t))
12331253

12341254

12351255
class TestProgramStateOldSaveSingleModel(unittest.TestCase):

python/paddle/framework/io.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import pickle
2020
import six
2121
import warnings
22+
import sys
2223

2324
import paddle
2425

@@ -262,8 +263,17 @@ def save(obj, path):
262263
saved_obj = _build_saved_state_dict(obj)
263264
saved_obj = _unpack_saved_dict(saved_obj)
264265

265-
with open(path, 'wb') as f:
266-
pickle.dump(saved_obj, f, protocol=2)
266+
# When value of dict is lager than 4GB ,there is a Bug on 'MAC python3.5/6'
267+
if sys.platform == 'darwin' and sys.version_info.major == 3 and (
268+
sys.version_info.minor == 5 or sys.version_info.minor == 6):
269+
pickle_bytes = pickle.dumps(saved_obj, protocol=2)
270+
with open(path, 'wb') as f:
271+
max_bytes = 2**30
272+
for i in range(0, len(pickle_bytes), max_bytes):
273+
f.write(pickle_bytes[i:i + max_bytes])
274+
else:
275+
with open(path, 'wb') as f:
276+
pickle.dump(saved_obj, f, protocol=2)
267277

268278

269279
def load(path, **configs):
19.3 KB
Binary file not shown.

0 commit comments

Comments
 (0)