Skip to content

Commit

Permalink
fixes #59 (#62)
Browse files Browse the repository at this point in the history
  • Loading branch information
akmorrow13 authored Feb 2, 2021
1 parent a1cf3ac commit d1e6b97
Showing 1 changed file with 15 additions and 6 deletions.
21 changes: 15 additions & 6 deletions epitome/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,11 @@ def __init__(self,

# load in specs for data
self.full_matrix, self.cellmap, self.targetmap = EpitomeDataset.get_assays(targets = targets,
cells = cells,
data_dir = self.data_dir,
min_cells_per_target = self.min_cells_per_target,
min_targets_per_cell = self.min_targets_per_cell,
similarity_targets = similarity_targets)
cells = cells,
data_dir = self.data_dir,
min_cells_per_target = self.min_cells_per_target,
min_targets_per_cell = self.min_targets_per_cell,
similarity_targets = similarity_targets)


# make a truncated matrix that includes updated indices for rows containing data from cellmap, targetmap
Expand Down Expand Up @@ -159,7 +159,16 @@ def get_data(self, mode):
i = np.empty_like(order)
i[order] = np.arange(order.size)

self._data = dataset['data'][self.row_indices[order],:][i,:]
# Indexing load time is about 1s per row.
# Because it takes about 1min to load all of the data into memory,
# it is just quicker to load all data into memory when you are accessing
# more than 100 rows.
if order.shape[0] > 60:
# faster to just load the whole thing into memory then subselect
self._data = dataset['data'][:,:][self.row_indices[order],:][i,:]
else:
self._data = dataset['data'][self.row_indices[order],:][i,:]

dataset.close()

if mode == Dataset.ALL:
Expand Down

0 comments on commit d1e6b97

Please sign in to comment.