Skip to content

Commit c5c9710

Browse files
committed
Add fractal and RipleyK feature
1 parent ca38256 commit c5c9710

File tree

3 files changed

+179
-1
lines changed

3 files changed

+179
-1
lines changed

src/adjacentK.py

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
import os.path
2+
3+
import ripleyk
4+
import numpy as np
5+
import pandas as pd
6+
import matplotlib.pyplot as plt
7+
import seaborn as sns
8+
9+
from scipy.spatial import cKDTree
10+
11+
res = 0.2201 # um per pixel
12+
radii_in_um = 32
13+
radii = [radii_in_um // res]
14+
15+
sample_size = [-1, -1]
16+
17+
def getCoords(data:pd.DataFrame):
18+
centroid = data['Centroid']
19+
centroid = centroid.apply(lambda x: x[1:-1].split(','))
20+
21+
xs = centroid.apply(lambda x: np.float32(x[0]))
22+
ys = centroid.apply(lambda x: np.float32(x[1]))
23+
return np.array(xs), np.array(ys)
24+
25+
26+
def getAllCoords(root, slide_name):
27+
cell_coords = {}
28+
for cell_type in ['I', 'S', 'T']:
29+
csv_file = os.path.join(root, f'{slide_name}_Feats_{cell_type}.csv')
30+
df = pd.read_csv(csv_file)
31+
xs, ys = getCoords(df)
32+
cell_coords[cell_type] = (xs, ys)
33+
sample_size[0], sample_size[1] = max(sample_size[0], max(xs)), max(sample_size[1], max(ys))
34+
return cell_coords
35+
36+
37+
def getRelation(coords:dict, cell_types:list):
38+
assert sample_size[0] != -1, 'sample_size should be set'
39+
k = []
40+
if len(cell_types) == 2:
41+
for radius in radii:
42+
counts = 0
43+
score_vol = np.pi * radius**2
44+
bound_size = sample_size[0] * sample_size[1]
45+
alpha_x, alpha_y = coords[cell_types[0]][0], coords[cell_types[0]][1]
46+
beta_x, beta_y = coords[cell_types[1]][0], coords[cell_types[1]][1]
47+
tree = cKDTree(np.array([alpha_x, alpha_y]).T)
48+
for x, y in zip(beta_x, beta_y):
49+
# boundary_correct = False
50+
counts += len(tree.query_ball_point([x, y], radius, p=2))-1
51+
# CSR_Normalise
52+
# k_value = bound_size * counts / len(beta_x)**2 - score_vol
53+
# estimation
54+
k_value = counts / len(beta_x)
55+
k.append(k_value)
56+
else:
57+
raise ValueError('cell_types should be a list of 2')
58+
return k
59+
60+
61+
if __name__ == '__main__':
62+
# allCoords = getAllCoords(r'C:\Users\Ed\Downloads\temp', '2023-31276')
63+
slide_name = 'TCAM' # '2023-31276'
64+
allCoords = getAllCoords(r'C:\Users\Ed\Downloads\temp', slide_name)
65+
# I_S = getRelation(allCoords, ['I', 'S'])
66+
67+
plt.figure(figsize=(24, 18))
68+
plt.scatter(allCoords['S'][0], allCoords['S'][1], c='blue', alpha=0.5, s=1)
69+
plt.scatter(allCoords['I'][0], allCoords['I'][1], c='green', alpha=0.5, s=1)
70+
plt.scatter(allCoords['T'][0], allCoords['T'][1], c='red', alpha=0.5, s=1)
71+
72+
plt.rcParams['font.family'] = 'Times New Roman'
73+
74+
plt.xlabel('X Coordinates')
75+
plt.ylabel('Y Coordinates')
76+
plt.title('Scatter plot of Centroids')
77+
plt.gca().invert_yaxis()
78+
plt.savefig(f'{slide_name}_scatter.png')
79+
80+
print(f'Cell \t Cell \t RipleyK')
81+
# print(f'I \t I \t {getRelation(allCoords, ["I"])}')
82+
# print(f'S \t S \t {getRelation(allCoords, ["S"])}')
83+
# print(f'T \t T \t {getRelation(allCoords, ["T"])}')
84+
#
85+
# print(f'I \t S \t {getRelation(allCoords, ["I", "S"])}')
86+
# print(f'I \t T \t {getRelation(allCoords, ["I", "T"])}')
87+
#
88+
# print(f'S \t I \t {getRelation(allCoords, ["S", "I"])}')
89+
# print(f'S \t T \t {getRelation(allCoords, ["S", "T"])}')
90+
#
91+
# print(f'T \t I \t {getRelation(allCoords, ["T", "I"])}')
92+
# print(f'T \t S \t {getRelation(allCoords, ["T", "S"])}')
93+
94+
cell_types = ['I', 'S', 'T']
95+
matrix = np.zeros((len(cell_types), len(cell_types), len(radii)))
96+
97+
for i, cell_type_a in enumerate(cell_types):
98+
for j, cell_type_b in enumerate(cell_types):
99+
matrix[i, j, :] = getRelation(allCoords, [cell_type_a, cell_type_b])
100+
print(f'{cell_type_a} \t {cell_type_b} \t {matrix[i, j, :]}')
101+
102+
for idx, radius in enumerate(radii):
103+
plt.figure(figsize=(6.5, 6))
104+
sns.heatmap(matrix[:, :, idx], annot=True, fmt=".2f", xticklabels=cell_types, yticklabels=cell_types, cmap="seismic")
105+
plt.title(f"Distribution of expectations ({radii_in_um} μm)")
106+
plt.xlabel("Cell Type B (surroundings)")
107+
plt.ylabel("Cell Type A (targets)")
108+
plt.savefig(f'{slide_name}_heatmap_radius_{radius}.png')
109+
plt.close()
110+
111+
112+
113+
114+

src/preprocess.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,58 @@ def worker_initializer():
3636
signal.signal(signal.SIGINT, signal.SIG_IGN)
3737

3838

39+
def _fractal_dimension(Z):
40+
"""
41+
Calculate the fractal dimension of an object (boundary complexity).
42+
43+
Source: https://gist.github.com/rougier/e5eafc276a4e54f516ed5559df4242c0
44+
45+
From https://en.wikipedia.org/wiki/Minkowski–Bouligand_dimension ...
46+
In fractal geometry, the Minkowski–Bouligand dimension, also known as
47+
Minkowski dimension or box-counting dimension, is a way of determining the
48+
fractal dimension of a set S in a Euclidean space Rn, or more generally in
49+
a metric space (X, d).
50+
51+
"""
52+
# Only for 2d binary image
53+
assert len(Z.shape) == 2
54+
Z = Z > 0
55+
56+
# From https://github.com/rougier/numpy-100 (#87)
57+
def boxcount(arr, k):
58+
S = np.add.reduceat(
59+
np.add.reduceat(arr, np.arange(0, arr.shape[0], k), axis=0),
60+
np.arange(0, arr.shape[1], k),
61+
axis=1)
62+
# We count non-empty (0) and non-full boxes (k*k)
63+
return len(np.where((S > 0) & (S < k * k))[0])
64+
65+
# Minimal dimension of image
66+
p = min(Z.shape)
67+
68+
# Greatest power of 2 less than or equal to p
69+
n = 2 ** np.floor(np.log(p) / np.log(2))
70+
71+
# Extract the exponent
72+
n = int(np.log(n) / np.log(2))
73+
74+
# Build successive box sizes (from 2**n down to 2**1)
75+
sizes = 2 ** np.arange(n, 1, -1)
76+
77+
# Actual box counting with decreasing size
78+
counts = []
79+
for size in sizes:
80+
counts.append(boxcount(Z, size))
81+
82+
# Fit the successive log(sizes) with log (counts)
83+
coeffs = [0]
84+
if len(counts):
85+
try:
86+
coeffs = np.polyfit(np.log(sizes), np.log(counts), 1)
87+
except TypeError:
88+
pass
89+
return -coeffs[0]
90+
3991
def getRegionPropFromContour(contour, bbox, extention=2):
4092
(left, top), (right, bottom) = bbox
4193
height, width = bottom - top, right - left
@@ -48,6 +100,7 @@ def getRegionPropFromContour(contour, bbox, extention=2):
48100
contour[:, 1] = contour[:, 1] - top + extention
49101
cv2.drawContours(image, [contour], 0, 1, -1)
50102
regionProp = regionprops(image)[0]
103+
regionProp.fractal_dim = _fractal_dimension(image)
51104
return regionProp
52105

53106

@@ -121,6 +174,7 @@ def SingleMorphFeatures(args):
121174
featuresDict['CurvStd'] += [curvStd]
122175
featuresDict['CurvMax'] += [curvMax]
123176
featuresDict['CurvMin'] += [curvMin]
177+
featuresDict['FractalDim'] += [regionProps.fractal_dim]
124178

125179
return featuresDict
126180

@@ -388,3 +442,7 @@ def run_wsi(args, configs):
388442
logging.info(f'Phase 1 Preprocessing \t 1 / 1 \t {args.seg} ')
389443
process(args.seg, args.wsi, args.buffer, args.level, configs['feature-set'], configs['cell-types'])
390444

445+
446+
if __name__ == '__main__':
447+
process(seg_path=r'C:\Users\Ed\Downloads\WSI_json_biopsy_resection_a\TCAM.json', wsi_path=r'C:\Users\Ed\Downloads\WSI_json_biopsy_resection_a\TCAM.ndpi', output_path=r'C:\Users\Ed\Downloads\temp', level=0, feature_set=['Morph'], cell_types=['I', 'S', 'T'])
448+
process(seg_path=r'C:\Users\Ed\Downloads\WSI_json_biopsy_resection_a\2023-31276.json', wsi_path=r'C:\Users\Ed\Downloads\WSI_json_biopsy_resection_a\2023-31276.svs', output_path=r'C:\Users\Ed\Downloads\temp', level=0, feature_set=['Morph'], cell_types=['I', 'S', 'T'])

src/utils.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import logging
2+
import os.path
23

34
import yaml
45
import pandas as pd
@@ -32,7 +33,12 @@ def read_yaml(file_path):
3233

3334

3435
def get_config():
35-
return read_yaml('config.yaml')
36+
if os.path.exists('config.yaml'):
37+
return read_yaml('config.yaml')
38+
elif os.path.exists('../config.yaml'):
39+
return read_yaml('../config.yaml')
40+
else:
41+
raise FileNotFoundError("Configuration file not found: config.yaml")
3642

3743

3844
# for triangle features

0 commit comments

Comments
 (0)