HaoyuCui
diff --git a/‎main.py
Lines changed: 6 additions & 6 deletions b/‎main.py
Lines changed: 6 additions & 6 deletions
diff --git a/‎requirements.txt
18 Bytes b/‎requirements.txt
18 Bytes
diff --git a/‎src/adjacentK.py
Lines changed: 113 additions & 0 deletions b/‎src/adjacentK.py
Lines changed: 113 additions & 0 deletions
diff --git a/‎src/postprocess.py
Lines changed: 35 additions & 10 deletions b/‎src/postprocess.py
Lines changed: 35 additions & 10 deletions
@@ -32,8 +32,9 @@ def main():
     configs = get_config()
 
     print_config(args, configs)
-    assert args.output.suffix in ['.xlsx', '.csv'], \
-        'Output file should be in xlsx or csv format. Directories are not supported.'
+
+    if os.path.isdir(args.output):
+        args.output = args.output / 'pypathomics-result.csv'
 
     if os.listdir(args.buffer) != 0:
         logging.warning(f'Buffer directory {args.buffer} is not empty, it may cause conflict. Continue? (y/N)')
@@ -48,13 +49,12 @@ def main():
     # Post-process features
     df_feats = postprocess_files(args, configs)
 
-    output_loc = args.output
     if args.output.suffix == '.xlsx':
-        df_feats.to_excel(output_loc, index=False)
+        df_feats.to_excel(args.output, index=False)
     elif args.output.suffix == '.csv':
-        df_feats.to_csv(output_loc, index=False)
+        df_feats.to_csv(args.output, index=False)
 
-    logging.info(f'Features saved to {output_loc}')
+    logging.info(f'Features saved to {args.output}')
 
 
 if __name__ == '__main__':
 
@@ -0,0 +1,113 @@
+import os.path
+
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+
+from scipy.spatial import cKDTree
+
+res = 0.2201  # um per pixel
+radii_in_um = 32
+radii = [radii_in_um // res]
+
+sample_size = [-1, -1]
+
+def getCoords(data:pd.DataFrame):
+    centroid = data['Centroid']
+    centroid = centroid.apply(lambda x: x[1:-1].split(','))
+
+    xs = centroid.apply(lambda x: np.float32(x[0]))
+    ys = centroid.apply(lambda x: np.float32(x[1]))
+    return np.array(xs), np.array(ys)
+
+
+def getAllCoords(root, slide_name):
+    cell_coords = {}
+    for cell_type in  ['I', 'S', 'T']:
+        csv_file = os.path.join(root, f'{slide_name}_Feats_{cell_type}.csv')
+        df = pd.read_csv(csv_file)
+        xs, ys = getCoords(df)
+        cell_coords[cell_type] = (xs, ys)
+        sample_size[0], sample_size[1] = max(sample_size[0], max(xs)), max(sample_size[1], max(ys))
+    return cell_coords
+
+
+def getRelation(coords:dict, cell_types:list):
+    assert sample_size[0] != -1, 'sample_size should be set'
+    k = []
+    if len(cell_types) == 2:
+        for radius in radii:
+            counts = 0
+            score_vol = np.pi * radius**2
+            bound_size = sample_size[0] * sample_size[1]
+            alpha_x, alpha_y = coords[cell_types[0]][0], coords[cell_types[0]][1]
+            beta_x, beta_y = coords[cell_types[1]][0], coords[cell_types[1]][1]
+            tree = cKDTree(np.array([alpha_x, alpha_y]).T)
+            for x, y in zip(beta_x, beta_y):
+                # boundary_correct = False
+                counts += len(tree.query_ball_point([x, y], radius, p=2))-1
+            # CSR_Normalise
+            # k_value = bound_size * counts / len(beta_x)**2 - score_vol
+            # estimation
+            k_value = counts / len(beta_x)
+            k.append(k_value)
+    else:
+        raise ValueError('cell_types should be a list of 2')
+    return k
+
+
+if __name__ == '__main__':
+    # allCoords = getAllCoords(r'C:\Users\Ed\Downloads\temp', '2023-31276')
+    slide_name = 'TCAM'  # '2023-31276'
+    allCoords = getAllCoords(r'C:\Users\Ed\Downloads\temp', slide_name)
+    # I_S = getRelation(allCoords, ['I', 'S'])
+
+    plt.figure(figsize=(24, 18))
+    plt.scatter(allCoords['S'][0], allCoords['S'][1], c='blue', alpha=0.5, s=1)
+    plt.scatter(allCoords['I'][0], allCoords['I'][1], c='green', alpha=0.5, s=1)
+    plt.scatter(allCoords['T'][0], allCoords['T'][1], c='red', alpha=0.5, s=1)
+
+    plt.rcParams['font.family'] = 'Times New Roman'
+
+    plt.xlabel('X Coordinates')
+    plt.ylabel('Y Coordinates')
+    plt.title('Scatter plot of Centroids')
+    plt.gca().invert_yaxis()
+    plt.savefig(f'{slide_name}_scatter.png')
+
+    print(f'Cell \t Cell \t RipleyK')
+    # print(f'I \t I \t {getRelation(allCoords, ["I"])}')
+    # print(f'S \t S \t {getRelation(allCoords, ["S"])}')
+    # print(f'T \t T \t {getRelation(allCoords, ["T"])}')
+    #
+    # print(f'I \t S \t {getRelation(allCoords, ["I", "S"])}')
+    # print(f'I \t T \t {getRelation(allCoords, ["I", "T"])}')
+    #
+    # print(f'S \t I \t {getRelation(allCoords, ["S", "I"])}')
+    # print(f'S \t T \t {getRelation(allCoords, ["S", "T"])}')
+    #
+    # print(f'T \t I \t {getRelation(allCoords, ["T", "I"])}')
+    # print(f'T \t S \t {getRelation(allCoords, ["T", "S"])}')
+
+    cell_types = ['I', 'S', 'T']
+    matrix = np.zeros((len(cell_types), len(cell_types), len(radii)))
+
+    for i, cell_type_a in enumerate(cell_types):
+        for j, cell_type_b in enumerate(cell_types):
+            matrix[i, j, :] = getRelation(allCoords, [cell_type_a, cell_type_b])
+            print(f'{cell_type_a} \t {cell_type_b} \t {matrix[i, j, :]}')
+
+    for idx, radius in enumerate(radii):
+        plt.figure(figsize=(6.5, 6))
+        sns.heatmap(matrix[:, :, idx], annot=True, fmt=".2f", xticklabels=cell_types, yticklabels=cell_types, cmap="seismic")
+        plt.title(f"Distribution of expectations ({radii_in_um} μm)")
+        plt.xlabel("Cell Type B (surroundings)")
+        plt.ylabel("Cell Type A (targets)")
+        plt.savefig(f'{slide_name}_heatmap_radius_{radius}.png')
+        plt.close()
+
+
+
+
+
@@ -1,12 +1,13 @@
 import logging
 import os.path
+from pathlib import Path
 
 import pandas as pd
 import numpy as np
 
-from scipy.spatial import Delaunay
+from scipy.spatial import Delaunay, cKDTree
 
-from src.utils import get_triangle_feature_df
+from src.utils import get_triangle_feature_df, get_coords
 
 
 class FeatureExtractor:
@@ -16,6 +17,13 @@ def __init__(self, slide, buffer_dir, feature_list, cell_types, statistic_types)
         self.feature_list = feature_list
         self.cell_types = cell_types
         self.statistic_types = statistic_types
+
+        # RipleyK's parameters
+        self.sample_size = [-1, -1]   # equal to the ROI's size
+        self.radii_in_um = 32
+        self.res = 0.2201  # um per pixel
+        self.radii = [self.radii_in_um // self.res]
+
         assert len(self.statistic_types) > 0, 'In config.yaml: static_types should not be empty!'
 
     def read_csv_for_type(self, cell_type):
@@ -92,23 +100,26 @@ def extract_triangle_features(self):
 
         return pd.DataFrame(triangle_feature, index=[0])
 
+
     def extract(self):
         if len(self.feature_list) == 0:
             raise ValueError('Feature list is empty! Check config file')
         elif len(self.cell_types) == 0:
             raise ValueError('Cell types list is empty! Check config file')
 
-        if 'Triangle' not in self.feature_list:
-            return self.extract_features()
-        elif 'Triangle' in self.feature_list and len(self.cell_types) == 1:
-            return self.extract_triangle_features()
-        elif 'Triangle' in self.feature_list and len(self.cell_types) >= 2:
-            return pd.concat([self.extract_features(), self.extract_triangle_features()], axis=1)
+        triangle_feature = pd.DataFrame()
+
+        if 'Triangle' in self.feature_list:
+            triangle_feature =  self.extract_triangle_features()
+
+        additional_features = triangle_feature
+
+        return pd.concat([self.extract_features(), additional_features], axis=1)
 
 
 # Core function
 def postprocess_files(args, configs):
-    process_queue = list(args.seg.glob(f'*.json')) + list(args.seg.glob(f'*.dat'))
+    process_queue = list(Path(args.seg).glob(f'*.json')) + list(Path(args.seg).glob(f'*.dat'))
     df_feats_list = []
     for i, slide in enumerate(process_queue):
         logging.info(f'Phase 2 Postprocessing \t {i + 1} / {len(process_queue)} \t {slide} ')
@@ -122,4 +133,18 @@ def postprocess_files(args, configs):
 
     df_feats = pd.concat(df_feats_list, ignore_index=True)
     cols = ['slide'] + [col for col in df_feats.columns if col != 'slide']
-    return df_feats[cols]
+    return df_feats[cols]
+
+
+if __name__ == '__main__':
+    from argparse import Namespace
+    args, configs = Namespace(), {}
+    args.seg = r'E:\hover-net-output\json'
+    args.buffer = r'E:\hover-net-output\feature'
+    configs['cell-types'] = ['I', 'S', 'T']
+    configs['statistic-types'] = ['basic']
+    configs['feature-set'] = ['Morph', 'Texture', 'Triangle']
+
+    df_feats = postprocess_files(args, configs)
+
+    df_feats.to_csv('ec-output.csv', index=False)