Misc: Spatial-query, kerchunk, and OME helper functions (#360)

* Misc changes * Bigtiff helper for rgb * Remove unused import * Lint * Unused variables
vitessce · Aug 31, 2024 · 06a04e7 · 06a04e7
1 parent 55b3f57
commit 06a04e7
Show file tree

Hide file tree

Showing 4 changed files with 58 additions and 9 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -80,6 +80,8 @@ all = [
   'ujson>=4.0.1',
   'starlette==0.14.0',
   'generate-tiff-offsets>=0.1.7',
+  'kerchunk>=0.2.6',
+  'fsspec>=2023.12.2',
 
   # aiofiles is not explicitly referenced in our code,
   # but it is an implicit dependency of starlette==0.14.0.

diff --git a/vitessce/data_utils/anndata.py b/vitessce/data_utils/anndata.py
@@ -6,6 +6,17 @@
 VAR_CHUNK_SIZE = 10
 
 
+def generate_h5ad_ref_spec(h5_url, omit_url=True):
+    from kerchunk.hdf import SingleHdf5ToZarr
+    h5chunks = SingleHdf5ToZarr(h5_url, inline_threshold=300)
+    h5dict = h5chunks.translate()
+    if omit_url:
+        for key, val in h5dict['refs'].items():
+            if isinstance(val, list):
+                h5dict['refs'][key] = [None, *val[1:]]
+    return h5dict
+
+
 def cast_arr(arr):
     """
     Try to cast an array to a dtype that takes up less space.

diff --git a/vitessce/data_utils/ome.py b/vitessce/data_utils/ome.py
@@ -5,6 +5,21 @@
 from .anndata import cast_arr
 
 
+def needs_bigtiff(img_arr_shape):
+    """
+    Helper function to determine if an image array is too large for standard TIFF format.
+
+    :param img_arr_shape: The shape of the image array.
+    :type img_arr_shape: tuple[int]
+    :return: True if the image array is too large for standard TIFF format, False otherwise.
+    :rtype: bool
+    """
+    num_pixels = 1
+    for n in img_arr_shape.shape:
+        num_pixels *= n
+    return (num_pixels > 2**32)
+
+
 def rgb_img_to_ome_tiff(img_arr, output_path, img_name="Image", axes="CYX"):
     """
     Convert an RGB image to OME-TIFF.
@@ -16,8 +31,9 @@ def rgb_img_to_ome_tiff(img_arr, output_path, img_name="Image", axes="CYX"):
     :param str axes: The array axis ordering. By default, "CYX"
     """
     img_arr = img_arr.astype(np.dtype('uint8'))
+    bigtiff = needs_bigtiff(img_arr.shape)
 
-    tiff_writer = TiffWriter(output_path, ome=True)
+    tiff_writer = TiffWriter(output_path, ome=True, bigtiff=bigtiff)
     tiff_writer.write(
         img_arr,
         metadata={
@@ -38,7 +54,9 @@ def multiplex_img_to_ome_tiff(img_arr, channel_names, output_path, axes="CYX"):
     :param str output_path: The path to save the Zarr store.
     :param str axes: The array axis ordering. By default, "CYX"
     """
-    tiff_writer = TiffWriter(output_path, ome=True)
+    bigtiff = needs_bigtiff(img_arr.shape)
+
+    tiff_writer = TiffWriter(output_path, ome=True, bigtiff=bigtiff)
     tiff_writer.write(
         img_arr,
         metadata={

diff --git a/vitessce/widget_plugins/spatial_query.py b/vitessce/widget_plugins/spatial_query.py
@@ -213,7 +213,10 @@ def fp_tree_to_obs_sets_tree(self, fp_tree, sq_id):
         obs_set_color = []
 
         for row_i, row in fp_tree.iterrows():
-            motif = row["itemsets"]
+            try:
+                motif = row["itemsets"]
+            except KeyError:
+                motif = row["motifs"]
             cell_i = row["cell_id"]
 
             motif_name = str(list(motif))
@@ -248,32 +251,45 @@ def run_sq(self, prev_config):
 
         max_dist = query_params.get("maxDist", 150)
         min_size = query_params.get("minSize", 4)
-        min_count = query_params.get("minCount", 10)
+        # min_count = query_params.get("minCount", 10)
         min_support = query_params.get("minSupport", 0.5)
-        dis_duplicates = query_params.get("disDuplicates", False)  # if distinguish duplicates of cell types in neighborhood
+        # dis_duplicates = query_params.get("disDuplicates", False)  # if distinguish duplicates of cell types in neighborhood
         query_type = query_params.get("queryType", "grid")
+        cell_type_of_interest = query_params.get("cellTypeOfInterest", None)
 
         query_uuid = query_params["uuid"]
 
         params_dict = dict(
             max_dist=max_dist,
             min_size=min_size,
-            min_count=min_count,
+            # min_count=min_count,
             min_support=min_support,
-            dis_duplicates=dis_duplicates,
+            # dis_duplicates=dis_duplicates,
             if_display=True,
             fig_size=(9, 6),
             return_cellID=True,
         )
         print(params_dict)
 
+        # TODO: add unit tests for this functionality
+
         if query_type == "rand":
             # TODO: implement param similar to return_grid for find_patterns_rand (to return the random points used)
             fp_tree = self.tt.find_patterns_rand(**params_dict)
         elif query_type == "grid":
             params_dict["return_grid"] = True
             fp_tree, grid_pos = self.tt.find_patterns_grid(**params_dict)
-        # TODO: support query_type == "ct-center"
+        elif query_type == "ct-center":
+            fp_tree = self.tt.motif_enrichment_knn(
+                ct=cell_type_of_interest,
+                k=20,  # TODO: make this a parameter in the UI.
+                min_support=min_support,
+                # dis_duplicates=dis_duplicates,
+                return_cellID=True,
+            )
+            print(fp_tree)
+
+        # TODO: implement query types that are dependent on motif selection.
 
         # Previous values
         additional_obs_sets = prev_config["coordinationSpace"]["additionalObsSets"]["A"]
@@ -292,7 +308,9 @@ def run_sq(self, prev_config):
         new_obs_set_selection = [[new_additional_obs_sets["tree"][0]["name"], motif_to_select, node["name"]] for node in new_additional_obs_sets["tree"][0]["children"][0]["children"]]
         prev_config["coordinationSpace"]["obsSetSelection"]["A"] = new_obs_set_selection
 
-        # TODO: set obsSetExpansion
+        # TODO: need to fix bug that prevents this from working
+        # Reference: https://github.com/vitessce/vitessce/blob/774328ab5c4436576dd2e8e4fff0758d6c6cce89/packages/view-types/obs-sets-manager/src/ObsSetsManagerSubscriber.js#L104
+        prev_config["coordinationSpace"]["obsSetExpansion"]["A"] = [path[:-1] for path in new_obs_set_selection]
 
         return {**prev_config, "uid": f"with_query_{query_uuid}"}