-
-
Notifications
You must be signed in to change notification settings - Fork 747
Closed
Description
Trying a first, very simple test on dask/distributed: aggregation over an hdf5 array/dataset via the distributed scheduler.
Array is created as follows:
with h5py.File('bla.h5', 'w') as f:
f.create_dataset('mydata', data=np.random.random(size=(2**16,)), chunks=(10000,))
Next, aggregation is run as follows:
c = Client('10.0.1.11:8786') # connect to local scheduler
with h5py.File('bla.h5') as f:
d = f['mydata']
x = da.from_array(d, chunks=(10000,))
print( x.min().compute() )
The compute call results in a serialization error:
distributed.protocol.core - CRITICAL - Failed to Serialize
Traceback (most recent call last):
File "..../distributed/protocol/core.py", line 53, in dumps
for key, value in data.items()
File "..../distributed/protocol/core.py", line 54, in <dictcomp>
if type(value) is Serialize}
File "..../distributed/protocol/serialize.py", line 157, in serialize
raise TypeError(msg)
TypeError: Could not serialize object of type Dataset
distributed.comm.utils - INFO - Unserializable Message: [{'op': 'update-graph', 'tasks': ............. }]
distributed.comm.utils - ERROR - Could not serialize object of type Dataset
Traceback (most recent call last):
File "..../distributed/comm/utils.py", line 40, in _to_frames
context=context))
File "..../distributed/protocol/core.py", line 53, in dumps
for key, value in data.items()
File "..../distributed/protocol/core.py", line 54, in <dictcomp>
if type(value) is Serialize}
File "..../distributed/protocol/serialize.py", line 157, in serialize
raise TypeError(msg)
TypeError: Could not serialize object of type Dataset
distributed.batched - ERROR - Error in batched write
Traceback (most recent call last):
File "..../distributed/batched.py", line 94, in _background_send
on_error='raise')
File "..../tornado/gen.py", line 1133, in run
value = future.result()
File "..../tornado/gen.py", line 1141, in run
yielded = self.gen.throw(*exc_info)
File "..../distributed/comm/tcp.py", line 221, in write
'recipient': self._peer_addr})
File "..../tornado/gen.py", line 1133, in run
value = future.result()
File "..../tornado/gen.py", line 1141, in run
yielded = self.gen.throw(*exc_info)
File "..../distributed/comm/utils.py", line 47, in to_frames
res = yield offload(_to_frames)
File "..../tornado/gen.py", line 1133, in run
value = future.result()
File "/usr/lib/python3.6/concurrent/futures/_base.py", line 425, in result
return self.__get_result()
File "/usr/lib/python3.6/concurrent/futures/_base.py", line 384, in __get_result
raise self._exception
File "/usr/lib/python3.6/concurrent/futures/thread.py", line 56, in run
result = self.fn(*self.args, **self.kwargs)
File "..../distributed/comm/utils.py", line 40, in _to_frames
context=context))
File "..../distributed/protocol/core.py", line 53, in dumps
for key, value in data.items()
File "..../distributed/protocol/core.py", line 54, in <dictcomp>
if type(value) is Serialize}
File "..../distributed/protocol/serialize.py", line 157, in serialize
raise TypeError(msg)
TypeError: Could not serialize object of type Dataset
I suspect something in the compute graph that is being sent around to clients (some numpy component?) is not serializable... is that possible?
Btw, the same happens when creating the hdf5 array via store:
with h5py.File('bla.h5', 'w') as f:
x = da.from_array(np.random.random(size=(2**16,)), chunks=(10000,))
dset = f.create_dataset('mydata', shape=x.shape, chunks=(10000,), dtype='f8')
da.store(x, dset)
It works fine however when using
c = Client( processes=False )
This is on Ubuntu 18, python 3.6. c.get_versions(check=True) returns
{'scheduler': {'host': (('python', '3.6.6.final.0'),
('python-bits', 64),
('OS', 'Linux'),
('OS-release', '4.15.0-36-generic'),
('machine', 'x86_64'),
('processor', 'x86_64'),
('byteorder', 'little'),
('LC_ALL', 'None'),
('LANG', 'en_US.UTF-8'),
('LOCALE', 'en_US.UTF-8')),
'packages': {'required': (('dask', '0.19.4'),
('distributed', '1.23.3'),
('msgpack', '0.5.6'),
('cloudpickle', '0.6.1'),
('tornado', '5.1.1'),
('toolz', '0.9.0')),
'optional': (('numpy', '1.15.3'),
('pandas', '0.23.4'),
('bokeh', '1.0.0'),
('lz4', None),
('dask_ml', None),
('blosc', None))}},
'workers': {'tcp://10.0.1.11:33675': {'host': (('python', '3.6.6.final.0'),
('python-bits', 64),
('OS', 'Linux'),
('OS-release', '4.15.0-36-generic'),
('machine', 'x86_64'),
('processor', 'x86_64'),
('byteorder', 'little'),
('LC_ALL', 'None'),
('LANG', 'en_US.UTF-8'),
('LOCALE', 'en_US.UTF-8')),
'packages': {'required': (('dask', '0.19.4'),
('distributed', '1.23.3'),
('msgpack', '0.5.6'),
('cloudpickle', '0.6.1'),
('tornado', '5.1.1'),
('toolz', '0.9.0')),
'optional': (('numpy', '1.15.3'),
('pandas', '0.23.4'),
('bokeh', '1.0.0'),
('lz4', None),
('dask_ml', None),
('blosc', None))}},
'tcp://10.0.1.11:37443': {'host': (('python', '3.6.6.final.0'),
('python-bits', 64),
('OS', 'Linux'),
('OS-release', '4.15.0-36-generic'),
('machine', 'x86_64'),
('processor', 'x86_64'),
('byteorder', 'little'),
('LC_ALL', 'None'),
('LANG', 'en_US.UTF-8'),
('LOCALE', 'en_US.UTF-8')),
'packages': {'required': (('dask', '0.19.4'),
('distributed', '1.23.3'),
('msgpack', '0.5.6'),
('cloudpickle', '0.6.1'),
('tornado', '5.1.1'),
('toolz', '0.9.0')),
'optional': (('numpy', '1.15.3'),
('pandas', '0.23.4'),
('bokeh', '1.0.0'),
('lz4', None),
('dask_ml', None),
('blosc', None))}},
'tcp://10.0.1.11:38463': {'host': (('python', '3.6.6.final.0'),
('python-bits', 64),
('OS', 'Linux'),
('OS-release', '4.15.0-36-generic'),
('machine', 'x86_64'),
('processor', 'x86_64'),
('byteorder', 'little'),
('LC_ALL', 'None'),
('LANG', 'en_US.UTF-8'),
('LOCALE', 'en_US.UTF-8')),
'packages': {'required': (('dask', '0.19.4'),
('distributed', '1.23.3'),
('msgpack', '0.5.6'),
('cloudpickle', '0.6.1'),
('tornado', '5.1.1'),
('toolz', '0.9.0')),
'optional': (('numpy', '1.15.3'),
('pandas', '0.23.4'),
('bokeh', '1.0.0'),
('lz4', None),
('dask_ml', None),
('blosc', None))}},
'tcp://10.0.1.11:43929': {'host': (('python', '3.6.6.final.0'),
('python-bits', 64),
('OS', 'Linux'),
('OS-release', '4.15.0-36-generic'),
('machine', 'x86_64'),
('processor', 'x86_64'),
('byteorder', 'little'),
('LC_ALL', 'None'),
('LANG', 'en_US.UTF-8'),
('LOCALE', 'en_US.UTF-8')),
'packages': {'required': (('dask', '0.19.4'),
('distributed', '1.23.3'),
('msgpack', '0.5.6'),
('cloudpickle', '0.6.1'),
('tornado', '5.1.1'),
('toolz', '0.9.0')),
'optional': (('numpy', '1.15.3'),
('pandas', '0.23.4'),
('bokeh', '1.0.0'),
('lz4', None),
('dask_ml', None),
('blosc', None))}}},
'client': {'host': [('python', '3.6.6.final.0'),
('python-bits', 64),
('OS', 'Linux'),
('OS-release', '4.15.0-36-generic'),
('machine', 'x86_64'),
('processor', 'x86_64'),
('byteorder', 'little'),
('LC_ALL', 'None'),
('LANG', 'en_US.UTF-8'),
('LOCALE', 'en_US.UTF-8')],
'packages': {'required': [('dask', '0.19.4'),
('distributed', '1.23.3'),
('msgpack', '0.5.6'),
('cloudpickle', '0.6.1'),
('tornado', '5.1.1'),
('toolz', '0.9.0')],
'optional': [('numpy', '1.15.3'),
('pandas', '0.23.4'),
('bokeh', '1.0.0'),
('lz4', None),
('dask_ml', None),
('blosc', None)]}}}
Thanks in advance!
Metadata
Metadata
Assignees
Labels
No labels