Skip to content

Commit 44c3826

Browse files
committed
Merge remote-tracking branch 'origin/master'
2 parents ef5b0c8 + 2249f9a commit 44c3826

File tree

6 files changed

+432
-23
lines changed

6 files changed

+432
-23
lines changed

main.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,9 @@ def main():
3232
configs = get_config()
3333

3434
print_config(args, configs)
35-
assert args.output.suffix in ['.xlsx', '.csv'], \
36-
'Output file should be in xlsx or csv format. Directories are not supported.'
35+
36+
if os.path.isdir(args.output):
37+
args.output = args.output / 'pypathomics-result.csv'
3738

3839
if os.listdir(args.buffer) != 0:
3940
logging.warning(f'Buffer directory {args.buffer} is not empty, it may cause conflict. Continue? (y/N)')
@@ -48,13 +49,12 @@ def main():
4849
# Post-process features
4950
df_feats = postprocess_files(args, configs)
5051

51-
output_loc = args.output
5252
if args.output.suffix == '.xlsx':
53-
df_feats.to_excel(output_loc, index=False)
53+
df_feats.to_excel(args.output, index=False)
5454
elif args.output.suffix == '.csv':
55-
df_feats.to_csv(output_loc, index=False)
55+
df_feats.to_csv(args.output, index=False)
5656

57-
logging.info(f'Features saved to {output_loc}')
57+
logging.info(f'Features saved to {args.output}')
5858

5959

6060
if __name__ == '__main__':

requirements.txt

18 Bytes
Binary file not shown.

src/adjacentK.py

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
import os.path
2+
3+
import numpy as np
4+
import pandas as pd
5+
import matplotlib.pyplot as plt
6+
import seaborn as sns
7+
8+
from scipy.spatial import cKDTree
9+
10+
res = 0.2201 # um per pixel
11+
radii_in_um = 32
12+
radii = [radii_in_um // res]
13+
14+
sample_size = [-1, -1]
15+
16+
def getCoords(data:pd.DataFrame):
17+
centroid = data['Centroid']
18+
centroid = centroid.apply(lambda x: x[1:-1].split(','))
19+
20+
xs = centroid.apply(lambda x: np.float32(x[0]))
21+
ys = centroid.apply(lambda x: np.float32(x[1]))
22+
return np.array(xs), np.array(ys)
23+
24+
25+
def getAllCoords(root, slide_name):
26+
cell_coords = {}
27+
for cell_type in ['I', 'S', 'T']:
28+
csv_file = os.path.join(root, f'{slide_name}_Feats_{cell_type}.csv')
29+
df = pd.read_csv(csv_file)
30+
xs, ys = getCoords(df)
31+
cell_coords[cell_type] = (xs, ys)
32+
sample_size[0], sample_size[1] = max(sample_size[0], max(xs)), max(sample_size[1], max(ys))
33+
return cell_coords
34+
35+
36+
def getRelation(coords:dict, cell_types:list):
37+
assert sample_size[0] != -1, 'sample_size should be set'
38+
k = []
39+
if len(cell_types) == 2:
40+
for radius in radii:
41+
counts = 0
42+
score_vol = np.pi * radius**2
43+
bound_size = sample_size[0] * sample_size[1]
44+
alpha_x, alpha_y = coords[cell_types[0]][0], coords[cell_types[0]][1]
45+
beta_x, beta_y = coords[cell_types[1]][0], coords[cell_types[1]][1]
46+
tree = cKDTree(np.array([alpha_x, alpha_y]).T)
47+
for x, y in zip(beta_x, beta_y):
48+
# boundary_correct = False
49+
counts += len(tree.query_ball_point([x, y], radius, p=2))-1
50+
# CSR_Normalise
51+
# k_value = bound_size * counts / len(beta_x)**2 - score_vol
52+
# estimation
53+
k_value = counts / len(beta_x)
54+
k.append(k_value)
55+
else:
56+
raise ValueError('cell_types should be a list of 2')
57+
return k
58+
59+
60+
if __name__ == '__main__':
61+
# allCoords = getAllCoords(r'C:\Users\Ed\Downloads\temp', '2023-31276')
62+
slide_name = 'TCAM' # '2023-31276'
63+
allCoords = getAllCoords(r'C:\Users\Ed\Downloads\temp', slide_name)
64+
# I_S = getRelation(allCoords, ['I', 'S'])
65+
66+
plt.figure(figsize=(24, 18))
67+
plt.scatter(allCoords['S'][0], allCoords['S'][1], c='blue', alpha=0.5, s=1)
68+
plt.scatter(allCoords['I'][0], allCoords['I'][1], c='green', alpha=0.5, s=1)
69+
plt.scatter(allCoords['T'][0], allCoords['T'][1], c='red', alpha=0.5, s=1)
70+
71+
plt.rcParams['font.family'] = 'Times New Roman'
72+
73+
plt.xlabel('X Coordinates')
74+
plt.ylabel('Y Coordinates')
75+
plt.title('Scatter plot of Centroids')
76+
plt.gca().invert_yaxis()
77+
plt.savefig(f'{slide_name}_scatter.png')
78+
79+
print(f'Cell \t Cell \t RipleyK')
80+
# print(f'I \t I \t {getRelation(allCoords, ["I"])}')
81+
# print(f'S \t S \t {getRelation(allCoords, ["S"])}')
82+
# print(f'T \t T \t {getRelation(allCoords, ["T"])}')
83+
#
84+
# print(f'I \t S \t {getRelation(allCoords, ["I", "S"])}')
85+
# print(f'I \t T \t {getRelation(allCoords, ["I", "T"])}')
86+
#
87+
# print(f'S \t I \t {getRelation(allCoords, ["S", "I"])}')
88+
# print(f'S \t T \t {getRelation(allCoords, ["S", "T"])}')
89+
#
90+
# print(f'T \t I \t {getRelation(allCoords, ["T", "I"])}')
91+
# print(f'T \t S \t {getRelation(allCoords, ["T", "S"])}')
92+
93+
cell_types = ['I', 'S', 'T']
94+
matrix = np.zeros((len(cell_types), len(cell_types), len(radii)))
95+
96+
for i, cell_type_a in enumerate(cell_types):
97+
for j, cell_type_b in enumerate(cell_types):
98+
matrix[i, j, :] = getRelation(allCoords, [cell_type_a, cell_type_b])
99+
print(f'{cell_type_a} \t {cell_type_b} \t {matrix[i, j, :]}')
100+
101+
for idx, radius in enumerate(radii):
102+
plt.figure(figsize=(6.5, 6))
103+
sns.heatmap(matrix[:, :, idx], annot=True, fmt=".2f", xticklabels=cell_types, yticklabels=cell_types, cmap="seismic")
104+
plt.title(f"Distribution of expectations ({radii_in_um} μm)")
105+
plt.xlabel("Cell Type B (surroundings)")
106+
plt.ylabel("Cell Type A (targets)")
107+
plt.savefig(f'{slide_name}_heatmap_radius_{radius}.png')
108+
plt.close()
109+
110+
111+
112+
113+

src/postprocess.py

Lines changed: 35 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
import logging
22
import os.path
3+
from pathlib import Path
34

45
import pandas as pd
56
import numpy as np
67

7-
from scipy.spatial import Delaunay
8+
from scipy.spatial import Delaunay, cKDTree
89

9-
from src.utils import get_triangle_feature_df
10+
from src.utils import get_triangle_feature_df, get_coords
1011

1112

1213
class FeatureExtractor:
@@ -16,6 +17,13 @@ def __init__(self, slide, buffer_dir, feature_list, cell_types, statistic_types)
1617
self.feature_list = feature_list
1718
self.cell_types = cell_types
1819
self.statistic_types = statistic_types
20+
21+
# RipleyK's parameters
22+
self.sample_size = [-1, -1] # equal to the ROI's size
23+
self.radii_in_um = 32
24+
self.res = 0.2201 # um per pixel
25+
self.radii = [self.radii_in_um // self.res]
26+
1927
assert len(self.statistic_types) > 0, 'In config.yaml: static_types should not be empty!'
2028

2129
def read_csv_for_type(self, cell_type):
@@ -92,23 +100,26 @@ def extract_triangle_features(self):
92100

93101
return pd.DataFrame(triangle_feature, index=[0])
94102

103+
95104
def extract(self):
96105
if len(self.feature_list) == 0:
97106
raise ValueError('Feature list is empty! Check config file')
98107
elif len(self.cell_types) == 0:
99108
raise ValueError('Cell types list is empty! Check config file')
100109

101-
if 'Triangle' not in self.feature_list:
102-
return self.extract_features()
103-
elif 'Triangle' in self.feature_list and len(self.cell_types) == 1:
104-
return self.extract_triangle_features()
105-
elif 'Triangle' in self.feature_list and len(self.cell_types) >= 2:
106-
return pd.concat([self.extract_features(), self.extract_triangle_features()], axis=1)
110+
triangle_feature = pd.DataFrame()
111+
112+
if 'Triangle' in self.feature_list:
113+
triangle_feature = self.extract_triangle_features()
114+
115+
additional_features = triangle_feature
116+
117+
return pd.concat([self.extract_features(), additional_features], axis=1)
107118

108119

109120
# Core function
110121
def postprocess_files(args, configs):
111-
process_queue = list(args.seg.glob(f'*.json')) + list(args.seg.glob(f'*.dat'))
122+
process_queue = list(Path(args.seg).glob(f'*.json')) + list(Path(args.seg).glob(f'*.dat'))
112123
df_feats_list = []
113124
for i, slide in enumerate(process_queue):
114125
logging.info(f'Phase 2 Postprocessing \t {i + 1} / {len(process_queue)} \t {slide} ')
@@ -122,4 +133,18 @@ def postprocess_files(args, configs):
122133

123134
df_feats = pd.concat(df_feats_list, ignore_index=True)
124135
cols = ['slide'] + [col for col in df_feats.columns if col != 'slide']
125-
return df_feats[cols]
136+
return df_feats[cols]
137+
138+
139+
if __name__ == '__main__':
140+
from argparse import Namespace
141+
args, configs = Namespace(), {}
142+
args.seg = r'E:\hover-net-output\json'
143+
args.buffer = r'E:\hover-net-output\feature'
144+
configs['cell-types'] = ['I', 'S', 'T']
145+
configs['statistic-types'] = ['basic']
146+
configs['feature-set'] = ['Morph', 'Texture', 'Triangle']
147+
148+
df_feats = postprocess_files(args, configs)
149+
150+
df_feats.to_csv('ec-output.csv', index=False)

0 commit comments

Comments
 (0)