From 06a04e7dfdb83965722790f5599389074de12143 Mon Sep 17 00:00:00 2001 From: Mark Keller <7525285+keller-mark@users.noreply.github.com> Date: Sat, 31 Aug 2024 18:35:17 -0400 Subject: [PATCH] Misc: Spatial-query, kerchunk, and OME helper functions (#360) * Misc changes * Bigtiff helper for rgb * Remove unused import * Lint * Unused variables --- pyproject.toml | 2 ++ vitessce/data_utils/anndata.py | 11 ++++++++ vitessce/data_utils/ome.py | 22 ++++++++++++++-- vitessce/widget_plugins/spatial_query.py | 32 ++++++++++++++++++------ 4 files changed, 58 insertions(+), 9 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e3bc7f44..3e2a1ee2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -80,6 +80,8 @@ all = [ 'ujson>=4.0.1', 'starlette==0.14.0', 'generate-tiff-offsets>=0.1.7', + 'kerchunk>=0.2.6', + 'fsspec>=2023.12.2', # aiofiles is not explicitly referenced in our code, # but it is an implicit dependency of starlette==0.14.0. diff --git a/vitessce/data_utils/anndata.py b/vitessce/data_utils/anndata.py index a9743670..648fc85d 100644 --- a/vitessce/data_utils/anndata.py +++ b/vitessce/data_utils/anndata.py @@ -6,6 +6,17 @@ VAR_CHUNK_SIZE = 10 +def generate_h5ad_ref_spec(h5_url, omit_url=True): + from kerchunk.hdf import SingleHdf5ToZarr + h5chunks = SingleHdf5ToZarr(h5_url, inline_threshold=300) + h5dict = h5chunks.translate() + if omit_url: + for key, val in h5dict['refs'].items(): + if isinstance(val, list): + h5dict['refs'][key] = [None, *val[1:]] + return h5dict + + def cast_arr(arr): """ Try to cast an array to a dtype that takes up less space. diff --git a/vitessce/data_utils/ome.py b/vitessce/data_utils/ome.py index 633d48b3..75a0272b 100644 --- a/vitessce/data_utils/ome.py +++ b/vitessce/data_utils/ome.py @@ -5,6 +5,21 @@ from .anndata import cast_arr +def needs_bigtiff(img_arr_shape): + """ + Helper function to determine if an image array is too large for standard TIFF format. + + :param img_arr_shape: The shape of the image array. + :type img_arr_shape: tuple[int] + :return: True if the image array is too large for standard TIFF format, False otherwise. + :rtype: bool + """ + num_pixels = 1 + for n in img_arr_shape.shape: + num_pixels *= n + return (num_pixels > 2**32) + + def rgb_img_to_ome_tiff(img_arr, output_path, img_name="Image", axes="CYX"): """ Convert an RGB image to OME-TIFF. @@ -16,8 +31,9 @@ def rgb_img_to_ome_tiff(img_arr, output_path, img_name="Image", axes="CYX"): :param str axes: The array axis ordering. By default, "CYX" """ img_arr = img_arr.astype(np.dtype('uint8')) + bigtiff = needs_bigtiff(img_arr.shape) - tiff_writer = TiffWriter(output_path, ome=True) + tiff_writer = TiffWriter(output_path, ome=True, bigtiff=bigtiff) tiff_writer.write( img_arr, metadata={ @@ -38,7 +54,9 @@ def multiplex_img_to_ome_tiff(img_arr, channel_names, output_path, axes="CYX"): :param str output_path: The path to save the Zarr store. :param str axes: The array axis ordering. By default, "CYX" """ - tiff_writer = TiffWriter(output_path, ome=True) + bigtiff = needs_bigtiff(img_arr.shape) + + tiff_writer = TiffWriter(output_path, ome=True, bigtiff=bigtiff) tiff_writer.write( img_arr, metadata={ diff --git a/vitessce/widget_plugins/spatial_query.py b/vitessce/widget_plugins/spatial_query.py index b5b4c75d..5ac589f4 100644 --- a/vitessce/widget_plugins/spatial_query.py +++ b/vitessce/widget_plugins/spatial_query.py @@ -213,7 +213,10 @@ def fp_tree_to_obs_sets_tree(self, fp_tree, sq_id): obs_set_color = [] for row_i, row in fp_tree.iterrows(): - motif = row["itemsets"] + try: + motif = row["itemsets"] + except KeyError: + motif = row["motifs"] cell_i = row["cell_id"] motif_name = str(list(motif)) @@ -248,32 +251,45 @@ def run_sq(self, prev_config): max_dist = query_params.get("maxDist", 150) min_size = query_params.get("minSize", 4) - min_count = query_params.get("minCount", 10) + # min_count = query_params.get("minCount", 10) min_support = query_params.get("minSupport", 0.5) - dis_duplicates = query_params.get("disDuplicates", False) # if distinguish duplicates of cell types in neighborhood + # dis_duplicates = query_params.get("disDuplicates", False) # if distinguish duplicates of cell types in neighborhood query_type = query_params.get("queryType", "grid") + cell_type_of_interest = query_params.get("cellTypeOfInterest", None) query_uuid = query_params["uuid"] params_dict = dict( max_dist=max_dist, min_size=min_size, - min_count=min_count, + # min_count=min_count, min_support=min_support, - dis_duplicates=dis_duplicates, + # dis_duplicates=dis_duplicates, if_display=True, fig_size=(9, 6), return_cellID=True, ) print(params_dict) + # TODO: add unit tests for this functionality + if query_type == "rand": # TODO: implement param similar to return_grid for find_patterns_rand (to return the random points used) fp_tree = self.tt.find_patterns_rand(**params_dict) elif query_type == "grid": params_dict["return_grid"] = True fp_tree, grid_pos = self.tt.find_patterns_grid(**params_dict) - # TODO: support query_type == "ct-center" + elif query_type == "ct-center": + fp_tree = self.tt.motif_enrichment_knn( + ct=cell_type_of_interest, + k=20, # TODO: make this a parameter in the UI. + min_support=min_support, + # dis_duplicates=dis_duplicates, + return_cellID=True, + ) + print(fp_tree) + + # TODO: implement query types that are dependent on motif selection. # Previous values additional_obs_sets = prev_config["coordinationSpace"]["additionalObsSets"]["A"] @@ -292,7 +308,9 @@ def run_sq(self, prev_config): new_obs_set_selection = [[new_additional_obs_sets["tree"][0]["name"], motif_to_select, node["name"]] for node in new_additional_obs_sets["tree"][0]["children"][0]["children"]] prev_config["coordinationSpace"]["obsSetSelection"]["A"] = new_obs_set_selection - # TODO: set obsSetExpansion + # TODO: need to fix bug that prevents this from working + # Reference: https://github.com/vitessce/vitessce/blob/774328ab5c4436576dd2e8e4fff0758d6c6cce89/packages/view-types/obs-sets-manager/src/ObsSetsManagerSubscriber.js#L104 + prev_config["coordinationSpace"]["obsSetExpansion"]["A"] = [path[:-1] for path in new_obs_set_selection] return {**prev_config, "uid": f"with_query_{query_uuid}"}