🐛 Fix Errors in the slidegraph Example Notebook (#608)

measty · shaneahmed · web-flow · commit e4deac4ab53a · 2023-06-23T12:52:22.000+01:00
Fixes some issues with the slidegraph notebooks.

1. Updates due to changes in STRtree in recent shapely versions
2. In the 'cell-composition' mode, add the 'filter_coordinates' step so that the mask is considered when generating graph nodes. Also made a small tweak to mask filter so that mask doesnt have to be single channel.
3. Fix the resolution of the plots being wrong when not using pre-generated model
4. Fixes for a couple of issues related to datatypes, maybe they crept in at some point due to numpy or torch version changes.

I have also added a note to explain the last few cells of the inference notebook are for composition features only, as there are only pretrained model weights for that mode.
---------

Co-authored-by: Shan E Ahmed Raza &lt;13048456+shaneahmed@users.noreply.github.com&gt;
diff --git a/examples/full-pipelines/slide-graph.ipynb b/examples/full-pipelines/slide-graph.ipynb
@@ -638,6 +638,7 @@
     "\n",
     "def get_cell_compositions(\n",
     "    wsi_path: str,\n",
+    "    mask_path: str,\n",
     "    inst_pred_path: str,\n",
     "    save_dir: str,\n",
     "    num_types: int = 6,\n",
@@ -662,8 +663,6 @@
     "    inst_boxes = np.array(inst_boxes)\n",
     "\n",
     "    geometries = [shapely_box(*bounds) for bounds in inst_boxes]\n",
-    "    # An auxiliary dictionary to actually query the index within the source list\n",
-    "    index_by_id = {id(geo): idx for idx, geo in enumerate(geometries)}\n",
     "    spatial_indexer = STRtree(geometries)\n",
     "\n",
     "    # * Generate patch coordinates (in xy format)\n",
@@ -676,21 +675,30 @@
     "        stride_shape=stride_shape,\n",
     "    )\n",
     "\n",
+    "    # filter out coords which dont lie in mask\n",
+    "    selected_coord_indices = PatchExtractor.filter_coordinates(\n",
+    "        WSIReader.open(mask_path),\n",
+    "        patch_inputs,\n",
+    "        wsi_shape=wsi_shape,\n",
+    "        min_mask_ratio=0.5,\n",
+    "    )\n",
+    "    patch_inputs = patch_inputs[selected_coord_indices]\n",
+    "\n",
     "    bounds_compositions = []\n",
     "    for bounds in patch_inputs:\n",
     "        bounds_ = shapely_box(*bounds)\n",
     "        indices = [\n",
-    "            index_by_id[id(geo)]\n",
+    "            geo\n",
     "            for geo in spatial_indexer.query(bounds_)\n",
-    "            if bounds_.contains(geo)\n",
+    "            if bounds_.contains(geometries[geo])\n",
     "        ]\n",
     "        insts = [inst_pred[v][\"type\"] for v in indices]\n",
     "        uids, freqs = np.unique(insts, return_counts=True)\n",
     "        # A bound may not contain all types, hence, to sync\n",
     "        # the array and placement across all types, we create\n",
     "        # a holder then fill the count within.\n",
     "        holder = np.zeros(num_types, dtype=np.int16)\n",
-    "        holder[uids] = freqs\n",
+    "        holder[uids.astype(int)] = freqs\n",
     "        bounds_compositions.append(holder)\n",
     "    bounds_compositions = np.array(bounds_compositions)\n",
     "\n",
@@ -706,8 +714,11 @@
     "    inst_segmentor = NucleusInstanceSegmentor(\n",
     "        pretrained_model=\"hovernet_fast-pannuke\",\n",
     "        batch_size=16,\n",
-    "        num_postproc_workers=2,\n",
+    "        num_postproc_workers=4,\n",
+    "        num_loader_workers=4,\n",
     "    )\n",
+    "    # bigger tile shape for postprocessing performance\n",
+    "    inst_segmentor.ioconfig.tile_shape = (4000, 4000)\n",
     "    # Injecting customized preprocessing functions,\n",
     "    # check the document or sample codes below for API\n",
     "    inst_segmentor.model.preproc_func = preproc_func\n",
@@ -735,7 +746,7 @@
     "\n",
     "    # TODO: parallelize this later if possible\n",
     "    for idx, path in enumerate(output_paths):\n",
-    "        get_cell_compositions(wsi_paths[idx], path, save_dir)\n",
+    "        get_cell_compositions(wsi_paths[idx], msk_paths[idx], path, save_dir)\n",
     "    return output_paths"
    ]
   },
@@ -1035,7 +1046,7 @@
    "outputs": [],
    "source": [
     "NODE_SIZE = 24\n",
-    "NODE_RESOLUTION = dict(resolution=0.5, units=\"mpp\")\n",
+    "NODE_RESOLUTION = dict(resolution=0.25, units=\"mpp\")\n",
     "PLOT_RESOLUTION = dict(resolution=4.0, units=\"mpp\")"
    ]
   },
@@ -1077,7 +1088,7 @@
     "plot_resolution = reader.slide_dimensions(**PLOT_RESOLUTION)\n",
     "fx = np.array(node_resolution) / np.array(plot_resolution)\n",
     "\n",
-    "node_coordinates = np.array(graph.coords) / fx\n",
+    "node_coordinates = np.array(graph.coordinates) / fx\n",
     "edges = graph.edge_index.T\n",
     "\n",
     "thumb = reader.slide_thumbnail(**PLOT_RESOLUTION)\n",
@@ -2458,7 +2469,7 @@
     "\n",
     "NODE_SIZE = 25\n",
     "NUM_NODE_FEATURES = 4\n",
-    "NODE_RESOLUTION = dict(resolution=0.5, units=\"mpp\")\n",
+    "NODE_RESOLUTION = dict(resolution=0.25, units=\"mpp\")\n",
     "PLOT_RESOLUTION = dict(resolution=4.0, units=\"mpp\")\n",
     "\n",
     "node_scaler = joblib.load(SCALER_PATH)\n",
@@ -2503,7 +2514,7 @@
     "cmap = plt.get_cmap(\"inferno\")\n",
     "graph = graph.to(\"cpu\")\n",
     "\n",
-    "node_coordinates = np.array(graph.coords) / fx\n",
+    "node_coordinates = np.array(graph.coordinates) / fx\n",
     "node_colors = (cmap(np.squeeze(node_activations))[..., :3] * 255).astype(np.uint8)\n",
     "edges = graph.edge_index.T\n",
     "\n",
diff --git a/examples/inference-pipelines/slide-graph.ipynb b/examples/inference-pipelines/slide-graph.ipynb
diff --git a/requirements/requirements.txt b/requirements/requirements.txt
@@ -1,5 +1,5 @@
 # torch installation
---extra-index-url https://download.pytorch.org/whl/cu117; sys_platform != "darwin"
+--extra-index-url https://download.pytorch.org/whl/cu118; sys_platform != "darwin"
 albumentations>=1.3.0
 Click>=8.1.3
 defusedxml>=0.7.1
diff --git a/tiatoolbox/tools/graph.py b/tiatoolbox/tools/graph.py
@@ -395,7 +395,7 @@ def build(
 
         return {
             "x": feature_centroids,
-            "edge_index": edge_index,
+            "edge_index": edge_index.astype(np.int64),
             "coordinates": point_centroids,
         }
 
diff --git a/tiatoolbox/tools/patchextraction.py b/tiatoolbox/tools/patchextraction.py
@@ -284,7 +284,7 @@ def filter_coordinates(
         tissue_mask = mask_reader.img
 
         # Scaling the coordinates_list to the `tissue_mask` array resolution
-        scale_factors = np.array(tissue_mask.shape[::-1]) / np.array(wsi_shape)
+        scale_factors = np.array(tissue_mask.shape[1::-1]) / np.array(wsi_shape)
         scaled_coords = coordinates_list.copy().astype(np.float32)
         scaled_coords[:, [0, 2]] *= scale_factors[0]
         scaled_coords[:, [0, 2]] = np.clip(

Original file line number	Diff line number	Diff line change
`@@ -395,7 +395,7 @@ def build(`
`395`	`395`
`396`	`396`	`return {`
`397`	`397`	`"x": feature_centroids,`
`398`		`- "edge_index": edge_index,`
	`398`	`+ "edge_index": edge_index.astype(np.int64),`
`399`	`399`	`"coordinates": point_centroids,`
`400`	`400`	`}`
`401`	`401`