-
Notifications
You must be signed in to change notification settings - Fork 58
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix strides issues over FITS arrays and base arrays with negative strides #930
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -92,7 +92,7 @@ def _array_fromfile(fd, size): | |
""" | ||
|
||
|
||
def _array_tofile_chunked(write, array, chunksize): # pragma: no cover | ||
def _array_tofile_chunked(write, array, chunksize): | ||
array = array.view(np.uint8) | ||
for i in range(0, array.nbytes, chunksize): | ||
write(array[i:i + chunksize].data) | ||
|
@@ -101,8 +101,7 @@ def _array_tofile_chunked(write, array, chunksize): # pragma: no cover | |
def _array_tofile_simple(fd, write, array): | ||
return write(array.data) | ||
|
||
|
||
if sys.platform == 'darwin': # pragma: no cover | ||
if sys.platform == 'darwin': | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Odd to have this logic at the module level. Wouldn't it be clearer to have the logic in the calling function? Or are there more than one calling functions? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't know for sure, but I suspect whoever wrote this was trying to be efficient by doing the check once at import time instead of on every call. |
||
def _array_tofile(fd, write, array): | ||
# This value is currently set as a workaround for a known bug in Python | ||
# on OSX. Individual writes must be less than 2GB, which necessitates | ||
|
@@ -112,7 +111,7 @@ def _array_tofile(fd, write, array): | |
if fd is None or array.nbytes >= OSX_WRITE_LIMIT and array.nbytes % 4096 == 0: | ||
return _array_tofile_chunked(write, array, OSX_WRITE_LIMIT) | ||
return _array_tofile_simple(fd, write, array) | ||
elif sys.platform.startswith('win'): # pragma: no cover | ||
elif sys.platform.startswith('win'): | ||
def _array_tofile(fd, write, array): | ||
WIN_WRITE_LIMIT = 2 ** 30 | ||
return _array_tofile_chunked(write, array, WIN_WRITE_LIMIT) | ||
Comment on lines
111
to
117
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That is a bit strange. I'll move those outside the functions. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually, I'm going to tackle this in a separate PR. These workarounds seem to no longer be necessary, but this PR will be included in an asdf 2.7.3 and I'd rather drop them in 2.8. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
|
@@ -370,7 +369,20 @@ def write(self, content): | |
""" | ||
|
||
def write_array(self, array): | ||
_array_tofile(None, self.write, array.ravel(order='K')) | ||
""" | ||
Write array content to the file. Array must be 1D contiguous | ||
so that this method can avoid making assumptions about the | ||
intended memory layout. Endianness is preserved. | ||
Parameters | ||
---------- | ||
array : np.ndarray | ||
Must be 1D contiguous. | ||
""" | ||
if len(array.shape) != 1 or not array.flags.contiguous: | ||
raise ValueError("Requires 1D contiguous array.") | ||
|
||
_array_tofile(None, self.write, array) | ||
|
||
def seek(self, offset, whence=0): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the first time I've seen There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's funny, I didn't notice that. Seems to be a bit of a tradition. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 😅 Shows you how much I paid attention in my C programming! |
||
""" | ||
|
@@ -749,7 +761,10 @@ def write_array(self, arr): | |
arr.flush() | ||
self.fast_forward(len(arr.data)) | ||
else: | ||
_array_tofile(self._fd, self._fd.write, arr.ravel(order='K')) | ||
if len(arr.shape) != 1 or not arr.flags.contiguous: | ||
raise ValueError("Requires 1D contiguous array.") | ||
|
||
_array_tofile(self._fd, self._fd.write, arr) | ||
|
||
def can_memmap(self): | ||
return True | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -81,7 +81,10 @@ def asdf_datatype_to_numpy_dtype(datatype, byteorder=None): | |
raise ValueError("Unknown datatype {0}".format(datatype)) | ||
|
||
|
||
def numpy_byteorder_to_asdf_byteorder(byteorder): | ||
def numpy_byteorder_to_asdf_byteorder(byteorder, override=None): | ||
if override is not None: | ||
return override | ||
|
||
if byteorder == '=': | ||
return sys.byteorder | ||
elif byteorder == '<': | ||
|
@@ -90,38 +93,38 @@ def numpy_byteorder_to_asdf_byteorder(byteorder): | |
return 'big' | ||
|
||
|
||
def numpy_dtype_to_asdf_datatype(dtype, include_byteorder=True): | ||
def numpy_dtype_to_asdf_datatype(dtype, include_byteorder=True, override_byteorder=None): | ||
dtype = np.dtype(dtype) | ||
if dtype.names is not None: | ||
fields = [] | ||
for name in dtype.names: | ||
field = dtype.fields[name][0] | ||
d = {} | ||
d['name'] = name | ||
field_dtype, byteorder = numpy_dtype_to_asdf_datatype(field) | ||
field_dtype, byteorder = numpy_dtype_to_asdf_datatype(field, override_byteorder=override_byteorder) | ||
d['datatype'] = field_dtype | ||
if include_byteorder: | ||
d['byteorder'] = byteorder | ||
if field.shape: | ||
d['shape'] = list(field.shape) | ||
fields.append(d) | ||
return fields, numpy_byteorder_to_asdf_byteorder(dtype.byteorder) | ||
return fields, numpy_byteorder_to_asdf_byteorder(dtype.byteorder, override=override_byteorder) | ||
|
||
elif dtype.subdtype is not None: | ||
return numpy_dtype_to_asdf_datatype(dtype.subdtype[0]) | ||
return numpy_dtype_to_asdf_datatype(dtype.subdtype[0], override_byteorder=override_byteorder) | ||
|
||
elif dtype.name in _datatype_names: | ||
return dtype.name, numpy_byteorder_to_asdf_byteorder(dtype.byteorder) | ||
return dtype.name, numpy_byteorder_to_asdf_byteorder(dtype.byteorder, override=override_byteorder) | ||
|
||
elif dtype.name == 'bool': | ||
return 'bool8', numpy_byteorder_to_asdf_byteorder(dtype.byteorder) | ||
return 'bool8', numpy_byteorder_to_asdf_byteorder(dtype.byteorder, override=override_byteorder) | ||
|
||
elif dtype.name.startswith('string') or dtype.name.startswith('bytes'): | ||
return ['ascii', dtype.itemsize], 'big' | ||
|
||
elif dtype.name.startswith('unicode') or dtype.name.startswith('str'): | ||
return (['ucs4', int(dtype.itemsize / 4)], | ||
numpy_byteorder_to_asdf_byteorder(dtype.byteorder)) | ||
numpy_byteorder_to_asdf_byteorder(dtype.byteorder, override=override_byteorder)) | ||
|
||
raise ValueError("Unknown dtype {0}".format(dtype)) | ||
|
||
|
@@ -408,32 +411,61 @@ def reserve_blocks(cls, data, ctx): | |
|
||
@classmethod | ||
def to_tree(cls, data, ctx): | ||
# The ndarray-1.0.0 schema does not permit 0 valued strides. | ||
# Perhaps we'll want to allow this someday, to efficiently | ||
# represent an array of all the same value. | ||
Comment on lines
+414
to
+416
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Isn't the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We haven't created There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Doh! Nevermind. I guess this would be a candidate feature of 1.1.0. =) |
||
if any(stride == 0 for stride in data.strides): | ||
data = np.ascontiguousarray(data) | ||
|
||
base = util.get_array_base(data) | ||
shape = data.shape | ||
dtype = data.dtype | ||
offset = data.ctypes.data - base.ctypes.data | ||
|
||
if data.flags.c_contiguous: | ||
block = ctx.blocks.find_or_create_block_for_array(data, ctx) | ||
|
||
if block.array_storage == "fits": | ||
# Views over arrays stored in FITS files have some idiosyncracies. | ||
# astropy.io.fits always writes arrays C-contiguous with big-endian | ||
# byte order, whereas asdf preserves the "contiguousity" and byte order | ||
# of the base array. | ||
if (block.data.shape != data.shape or | ||
block.data.dtype.itemsize != data.dtype.itemsize or | ||
block.data.ctypes.data != data.ctypes.data or | ||
block.data.strides != data.strides): | ||
raise ValueError( | ||
"ASDF has only limited support for serializing views over arrays stored " | ||
"in FITS HDUs. This error likely means that a slice of such an array " | ||
"was found in the ASDF tree. The slice can be decoupled from the FITS " | ||
"array by calling copy() before assigning it to the tree." | ||
) | ||
|
||
offset = 0 | ||
strides = None | ||
dtype, byteorder = numpy_dtype_to_asdf_datatype( | ||
data.dtype, | ||
include_byteorder=(block.array_storage != "inline"), | ||
override_byteorder="big", | ||
) | ||
else: | ||
strides = data.strides | ||
base = util.get_array_base(data) | ||
# Compute the offset relative to the base array and not the | ||
# block data, in case the block is compressed. | ||
offset = data.ctypes.data - base.ctypes.data | ||
|
||
block = ctx.blocks.find_or_create_block_for_array(data, ctx) | ||
if data.flags.c_contiguous: | ||
strides = None | ||
else: | ||
strides = data.strides | ||
|
||
dtype, byteorder = numpy_dtype_to_asdf_datatype( | ||
data.dtype, | ||
include_byteorder=(block.array_storage != "inline"), | ||
) | ||
|
||
result = {} | ||
|
||
result['shape'] = list(shape) | ||
if block.array_storage == 'streamed': | ||
result['shape'][0] = '*' | ||
|
||
dtype, byteorder = numpy_dtype_to_asdf_datatype( | ||
dtype, include_byteorder=(block.array_storage != 'inline')) | ||
|
||
byteorder = block.override_byteorder(byteorder) | ||
|
||
if block.array_storage == 'inline': | ||
listdata = numpy_array_to_list(data) | ||
result['data'] = listdata | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why removed, and what was this ever supposed to do?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
FITS always writes arrays in big-endian byte order, so we needed a mechanism to tell NDArrayType to ignore the byte order reported by the base array. This logic has been moved inside NDArrayType itself.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Glad to see the logic moved inside the class itself.