Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix mat support #1337

Merged
merged 4 commits into from
Apr 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 26 additions & 28 deletions caiman/base/movies.py
Original file line number Diff line number Diff line change
Expand Up @@ -1244,7 +1244,8 @@ def load(file_name: Union[str, list[str]],
dimension of the movie along x and y if loading from a two dimensional numpy array

var_name_hdf5: str
if loading from hdf5/n5 name of the dataset inside the file to load (ignored if the file only has one dataset)
if loading from hdf5/n5 name of the dataset inside the file to load (ignored if the file only has one dataset).
This is also used for (new-style) mat files

in_memory: bool=False
This changes the behaviour of the function for npy files to be a readwrite rather than readonly memmap,
Expand Down Expand Up @@ -1314,17 +1315,6 @@ def load(file_name: Union[str, list[str]],
basename, extension = os.path.splitext(file_name)

extension = extension.lower()
if extension == '.mat':
logging.warning('Loading a *.mat file. x- and y- dimensions ' +
'might have been swapped.')
try: # scipy >= 1.8
byte_stream, file_opened = scipy.io.matlab._mio._open_file(file_name, appendmat=False)
mjv, mnv = scipy.io.matlab.miobase.get_matfile_version(byte_stream)
except: # scipy <= 1.7
byte_stream, file_opened = scipy.io.matlab.mio._open_file(file_name, appendmat=False)
mjv, mnv = scipy.io.matlab.mio.get_matfile_version(byte_stream)
if mjv == 2:
extension = '.h5'

if extension in ['.tif', '.tiff', '.btf']: # load tif file
with tifffile.TiffFile(file_name) as tffl:
Expand Down Expand Up @@ -1512,23 +1502,23 @@ def load(file_name: Union[str, list[str]],
else:
input_arr = input_arr[np.newaxis, :, :]

elif extension == '.mat': # load npy file
input_arr = scipy.io.loadmat(file_name)['data']
input_arr = np.rollaxis(input_arr, 2, -3)
if subindices is not None:
input_arr = input_arr[subindices]

elif extension == '.npz': # load movie from saved file
if subindices is not None:
raise Exception('Subindices not implemented')
with np.load(file_name) as f:
return movie(**f).astype(outtype)

elif extension in ('.hdf5', '.h5', '.nwb', 'n5', 'zarr'):
elif extension in ('.hdf5', '.h5', '.mat', '.nwb', 'n5', 'zarr'):
if extension in ('n5', 'zarr'): # Thankfully, the zarr library lines up closely with h5py past the initial open
f = zarr.open(file_name, "r")
else:
f = h5py.File(file_name, "r")
try:
f = h5py.File(file_name, "r")
except:
if extension == '.mat':
raise Exception(f"Problem loading {file_name}: Unknown format. This may be in the original version 1 (non-hdf5) mat format; please convert it first")
else:
raise Exception(f"Problem loading {file_name}: Unknown format.")
ignore_keys = ['__DATA_TYPES__'] # Known metadata that tools provide, add to this as needed. Sync with get_file_size() !!
fkeys = list(filter(lambda x: x not in ignore_keys, f.keys()))
if len(fkeys) == 1: # If the file we're parsing has only one dataset inside it,
Expand Down Expand Up @@ -1955,7 +1945,13 @@ def load_iter(file_name: Union[str, list[str]], subindices=None, var_name_hdf5:
if extension in ('.n5', '.zarr'): # Thankfully, the zarr library lines up closely with h5py past the initial open
f = zarr.open(file_name, "r")
else:
f = h5py.File(file_name, "r")
try:
f = h5py.File(file_name, "r")
except:
if extension == '.mat':
raise Exception(f"Problem loading {file_name}: Unknown format. This may be in the original version 1 (non-hdf5) mat format; please convert it first")
else:
raise Exception(f"Problem loading {file_name}: Unknown format.")
ignore_keys = ['__DATA_TYPES__'] # Known metadata that tools provide, add to this as needed.
fkeys = list(filter(lambda x: x not in ignore_keys, f.keys()))
if len(fkeys) == 1: # If the hdf5 file we're parsing has only one dataset inside it,
Expand Down Expand Up @@ -2010,11 +2006,7 @@ def get_file_size(file_name, var_name_hdf5:str='mov') -> tuple[tuple, Union[int,
if os.path.exists(file_name):
_, extension = os.path.splitext(file_name)[:2]
extension = extension.lower()
if extension == '.mat':
byte_stream, file_opened = scipy.io.matlab.mio._open_file(file_name, appendmat=False)
mjv, mnv = scipy.io.matlab.mio.get_matfile_version(byte_stream)
if mjv == 2:
extension = '.h5'

if extension in ['.tif', '.tiff', '.btf']:
tffl = tifffile.TiffFile(file_name)
siz = tffl.series[0].shape
Expand Down Expand Up @@ -2042,12 +2034,18 @@ def get_file_size(file_name, var_name_hdf5:str='mov') -> tuple[tuple, Union[int,
filename = os.path.split(file_name)[-1]
Yr, dims, T = caiman.mmapping.load_memmap(os.path.join(
os.path.split(file_name)[0], filename))
elif extension in ('.h5', '.hdf5', '.nwb', 'n5', 'zarr'):
elif extension in ('.h5', '.hdf5', '.mat', '.nwb', 'n5', 'zarr'):
# FIXME this doesn't match the logic in load()
if extension in ('n5', 'zarr'): # Thankfully, the zarr library lines up closely with h5py past the initial open
f = zarr.open(file_name, "r")
else:
f = h5py.File(file_name, "r")
try:
f = h5py.File(file_name, "r")
except:
if extension == '.mat':
raise Exception(f"Problem loading {file_name}: Unknown format. This may be in the original version 1 (non-hdf5) mat format; please convert it first")
else:
raise Exception(f"Problem loading {file_name}: Unknown format.")
ignore_keys = ['__DATA_TYPES__'] # Known metadata that tools provide, add to this as needed. Sync with movies.my:load() !!
kk = list(filter(lambda x: x not in ignore_keys, f.keys()))
if len(kk) == 1: # TODO: Consider recursing into a group to find a dataset
Expand Down
2 changes: 1 addition & 1 deletion docs/source/file_formats.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ Notes for specific formats follow:

Matlab (\*.mat)
---------------
Caiman may be able to handle Matlab's mat files, although it is not a desirable format. We rely on the scipy.io.matlab functions to handle files of this type, except for certain versions of the format which are actually hdf5 files
Caiman can handle Matlab's V2 mat files (which are HDF5 files in a certain format). If you need old-style support, reach out and we can consider adding it.

TIFF (\*.tiff, \*.tif, \*.btf)
------------------------------
Expand Down
Loading