Skip to content

Sm dev #62

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 47 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
e2c4f4b
AZ segmentation
SarahMuth Oct 23, 2024
a0f713f
updates
SarahMuth Oct 28, 2024
94f9121
Merge branch 'main' of https://github.com/computational-cell-analytic…
SarahMuth Oct 28, 2024
ac1ac00
update 2D DA
SarahMuth Oct 28, 2024
37de75d
Merge branch 'main' of https://github.com/computational-cell-analytic…
SarahMuth Oct 29, 2024
61c57fa
small updates, compartment segmentation
SarahMuth Nov 7, 2024
40e965e
Implement code for first analysis
constantinpape Nov 7, 2024
7be9ee8
2D seg with mask
SarahMuth Nov 11, 2024
b1bef7e
Merge branch 'analysis' of https://github.com/computational-cell-anal…
SarahMuth Nov 11, 2024
f85e445
spatial distribution analysis
SarahMuth Nov 11, 2024
8ef16bc
intersection between compartment boundary and AZ segmentaiton
SarahMuth Nov 12, 2024
e625ef7
Merge branch 'main' of https://github.com/computational-cell-analytic…
SarahMuth Nov 12, 2024
09f6c84
Update compartment postprocessing
constantinpape Nov 12, 2024
d7dbb39
Merge branch 'more-comp-seg-updates' of https://github.com/computatio…
SarahMuth Nov 12, 2024
f893d23
updating data analysis on smaller details
SarahMuth Nov 13, 2024
08c56b9
minor updates data analysis
SarahMuth Nov 13, 2024
36d834f
Implement inner ear analysis WIP
constantinpape Nov 14, 2024
49d1b7c
calculation of AZ area
SarahMuth Nov 14, 2024
8a515d1
corrected radius factor
SarahMuth Nov 14, 2024
0f40d3c
Update inner ear analysis
constantinpape Nov 15, 2024
ad4741b
Update inner ear analysis
constantinpape Nov 17, 2024
305a80b
Updates to inner ear training and eval
constantinpape Nov 17, 2024
903e59e
Update inner ear analysis
constantinpape Nov 18, 2024
b1449d2
minor changes
SarahMuth Nov 19, 2024
0b7884d
Merge branch 'main' of https://github.com/computational-cell-analytic…
constantinpape Nov 19, 2024
186c92d
Update inner ear analysis scripts
constantinpape Nov 20, 2024
186df5b
Merge branch 'more-inner-ear-analysis' of https://github.com/computat…
constantinpape Nov 20, 2024
2ccf340
Add script to extract vesicle diameters for inner ear data
constantinpape Nov 20, 2024
5feff6a
Update active zone analysis for SNAP/MUNC data
constantinpape Nov 21, 2024
9b8c7a2
Add more inner ear analysis code
constantinpape Nov 21, 2024
db89b44
evaluation of AZ seg
SarahMuth Nov 23, 2024
51165a5
Fix issues with the segmentation export to IMOD
constantinpape Nov 23, 2024
aa5d78e
clean up
SarahMuth Nov 23, 2024
20e429b
clean up
SarahMuth Nov 23, 2024
19f618e
clean up
SarahMuth Nov 23, 2024
cb693b1
Update data summaries
constantinpape Nov 24, 2024
a0c31a8
Fix issue in data aggregation
constantinpape Nov 24, 2024
93a66c1
Update data summary
constantinpape Nov 24, 2024
e0dfda6
Merge branch 'main' into more-inner-ear-analysis
constantinpape Nov 24, 2024
59a38db
Update all measurements for the inner ear analysis
constantinpape Nov 24, 2024
9728951
Update vesicle diameter analysis
constantinpape Nov 24, 2024
84d3ec7
Merge branch 'more-inner-ear-analysis' of https://github.com/computat…
SarahMuth Nov 25, 2024
622da1e
update AZ evaluation
SarahMuth Nov 27, 2024
686b018
erosion dilation filtering of AZ
SarahMuth Nov 28, 2024
6b54e4a
stuff for revision
SarahMuth Mar 31, 2025
7d675ab
everything after 1st revision relating to training, inference, postpr…
SarahMuth May 22, 2025
f052b98
minor things
SarahMuth May 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ models/*/
run_sbatch.sbatch
slurm/
scripts/cooper/evaluation_results/
analysis_results/
scripts/cooper/training/copy_testset.py
scripts/rizzoli/upsample_data.py
scripts/cooper/training/find_rec_testset.py
scripts/cooper/training/find_rec_testset.py
scripts/rizzoli/combine_2D_slices.py
scripts/rizzoli/combine_2D_slices_raw.py
scripts/cooper/remove_h5key.py
scripts/cooper/analysis/calc_AZ_area.py
87 changes: 87 additions & 0 deletions big_to_small_pixel_size.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import os
import numpy as np
import h5py
from glob import glob
from scipy.ndimage import zoom
from scipy.ndimage import label
from skimage.morphology import closing, ball

# Input and output folders
input_folder = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/AZ_data_after1stRevision/recorrected_length_of_AZ/wichmann_withAZ"
output_folder = "/mnt/lustre-emmy-hdd/usr/u12095/synaptic_reconstruction/AZ_data_after1stRevision/recorrected_length_of_AZ/wichmann_withAZ_rescaled_tomograms"
os.makedirs(output_folder, exist_ok=True)

# Define scaling factors
old_pixel_size = np.array([1.75, 1.75, 1.75])
new_pixel_size = np.array([1.55, 1.55, 1.55])
scaling_factors = old_pixel_size / new_pixel_size

# Utility function to process segmentation
def rescale_and_fix_segmentation(segmentation, scaling_factors):
"""
Rescale the segmentation and ensure labels are preserved.
Args:
segmentation (numpy.ndarray): The input segmentation array with integer labels.
scaling_factors (list or array): Scaling factors for each axis.
Returns:
numpy.ndarray: Rescaled and hole-free segmentation with preserved labels.
"""
# Rescale segmentation using nearest-neighbor interpolation
rescaled_segmentation = zoom(segmentation, scaling_factors, order=0)

# Initialize an array to hold the processed segmentation
processed_segmentation = np.zeros_like(rescaled_segmentation)

# Ensure no holes for each label
unique_labels = np.unique(rescaled_segmentation)
for label_id in unique_labels:
if label_id == 0: # Skip the background
continue

# Extract binary mask for the current label
label_mask = rescaled_segmentation == label_id

# Apply morphological closing to fill holes
closed_mask = closing(label_mask, ball(1))

# Add the processed label back to the output segmentation
processed_segmentation[closed_mask] = label_id

return processed_segmentation.astype(segmentation.dtype)


# Get all .h5 files in the specified input folder
h5_files = glob(os.path.join(input_folder, "*.h5"))
existing_files = {os.path.basename(f) for f in glob(os.path.join(output_folder, "*.h5"))}

for h5_file in h5_files:
print(f"Processing {h5_file}...")

if os.path.basename(h5_file) in existing_files:
print(f"Skipping {h5_file} as it already exists in the output folder.")
continue

# Read data from the .h5 file
with h5py.File(h5_file, "r") as f:
raw = f["raw"][:] # Assuming the dataset is named "raw"
az = f["labels/az"][:]

print(f"Original shape - raw: {raw.shape}; az: {az.shape}")

# Process raw data (tomogram) with linear interpolation
print("Rescaling raw data...")
rescaled_raw = zoom(raw, scaling_factors, order=1)

# Process az segmentation
print("Rescaling and fixing az segmentation...")
rescaled_az = rescale_and_fix_segmentation(az, scaling_factors)

# Save the processed data to a new .h5 file
output_path = os.path.join(output_folder, os.path.basename(h5_file))
with h5py.File(output_path, "w") as f:
f.create_dataset("raw", data=rescaled_raw, compression="gzip")
f.create_dataset("labels/az", data=rescaled_az, compression="gzip")

print(f"Saved rescaled data to {output_path}")

print("Processing complete. Rescaled files are saved in:", output_folder)
122 changes: 76 additions & 46 deletions scripts/aggregate_data_information.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,55 +12,64 @@
stem = "STEM"


def aggregate_vesicle_train_data(roots, test_tomograms, conditions, resolutions):
def aggregate_vesicle_train_data(roots, conditions, resolutions):
tomo_names = []
tomo_vesicles = []
tomo_vesicles_all, tomo_vesicles_imod = [], []
tomo_condition = []
tomo_resolution = []
tomo_train = []

for ds, root in roots.items():
print("Aggregate data for", ds)
train_root = root["train"]
if train_root == "":
test_root = root["test"]
tomograms = sorted(glob(os.path.join(test_root, "2024**", "*.h5"), recursive=True))
this_test_tomograms = [os.path.basename(tomo) for tomo in tomograms]
def aggregate_split(ds, split_root, split):
if ds.startswith("04"):
tomograms = sorted(glob(os.path.join(split_root, "2024**", "*.h5"), recursive=True))
else:
# This is only the case for 04, which is also nested
tomograms = sorted(glob(os.path.join(train_root, "*.h5")))
this_test_tomograms = test_tomograms[ds]
tomograms = sorted(glob(os.path.join(split_root, "*.h5")))

assert len(tomograms) > 0, ds
this_condition = conditions[ds]
this_resolution = resolutions[ds][0]

for tomo_path in tqdm(tomograms):
for tomo_path in tqdm(tomograms, desc=f"Aggregate {split}"):
fname = os.path.basename(tomo_path)
with h5py.File(tomo_path, "r") as f:
try:
tomo_name = f.attrs["filename"]
except KeyError:
tomo_name = fname

n_label_sets = len(f["labels"])
if n_label_sets > 2:
print(tomo_path, "contains the following labels:", list(f["labels"].keys()))
seg = f["labels/vesicles"][:]
n_vesicles = len(np.unique(seg)) - 1
if "labels/vesicles/combined_vesicles" in f:
all_vesicles = f["labels/vesicles/combined_vesicles"][:]
imod_vesicles = f["labels/vesicles/masked_vesicles"][:]
n_vesicles_all = len(np.unique(all_vesicles)) - 1
n_vesicles_imod = len(np.unique(imod_vesicles)) - 2
else:
vesicles = f["labels/vesicles"][:]
n_vesicles_all = len(np.unique(vesicles)) - 1
n_vesicles_imod = n_vesicles_all

tomo_names.append(tomo_name)
tomo_vesicles.append(n_vesicles)
tomo_vesicles_all.append(n_vesicles_all)
tomo_vesicles_imod.append(n_vesicles_imod)
tomo_condition.append(this_condition)
tomo_resolution.append(this_resolution)
tomo_train.append("test" if fname in this_test_tomograms else "train/val")
tomo_train.append(split)

for ds, root in roots.items():
print("Aggregate data for", ds)
train_root = root["train"]
if train_root != "":
aggregate_split(ds, train_root, "train/val")
test_root = root["test"]
if test_root != "":
aggregate_split(ds, test_root, "test")

df = pd.DataFrame({
"tomogram": tomo_names,
"condition": tomo_condition,
"resolution": tomo_resolution,
"used_for": tomo_train,
"vesicle_count": tomo_vesicles,
"vesicle_count_all": tomo_vesicles_all,
"vesicle_count_imod": tomo_vesicles_imod,
})

os.makedirs("data_summary", exist_ok=True)
Expand All @@ -70,60 +79,47 @@ def aggregate_vesicle_train_data(roots, test_tomograms, conditions, resolutions)
def vesicle_train_data():
roots = {
"01": {
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/01_hoi_maus_2020_incomplete", # noqa
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/01_hoi_maus_2020_incomplete", # noqa
"test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/01_hoi_maus_2020_incomplete", # noqa
},
"02": {
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/02_hcc_nanogold", # noqa
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/02_hcc_nanogold", # noqa
"test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/02_hcc_nanogold", # noqa
},
"03": {
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/03_hog_cs1sy7", # noqa
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/03_hog_cs1sy7", # noqa
"test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/03_hog_cs1sy7", # noqa
},
"04": {
"train": "",
"test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/ground_truth/04Dataset_for_vesicle_eval/", # noqa
},
"05": {
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/05_stem750_sv_training", # noqa
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/05_stem750_sv_training", # noqa
"test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/05_stem750_sv_training", # noqa
},
"07": {
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/07_hoi_s1sy7_tem250_ihgp", # noqa
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/07_hoi_s1sy7_tem250_ihgp", # noqa
"test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/07_hoi_s1sy7_tem250_ihgp", # noqa
},
"09": {
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/09_stem750_66k", # noqa
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/09_stem750_66k", # noqa
"test": "",
},
"10": {
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/10_tem_single_release", # noqa
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/10_tem_single_release", # noqa
"test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/10_tem_single_release", # noqa
},
"11": {
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/11_tem_multiple_release", # noqa
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/11_tem_multiple_release", # noqa
"test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/11_tem_multiple_release", # noqa
},
"12": {
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/extracted/20240909_cp_datatransfer/12_chemical_fix_cryopreparation", # noqa
"train": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/12_chemical_fix_cryopreparation", # noqa
"test": "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/vesicles_processed_v2/testsets/12_chemical_fix_cryopreparation", # noqa
},
}

test_tomograms = {
"01": ["tomogram-009.h5", "tomogram-038.h5", "tomogram-049.h5", "tomogram-052.h5", "tomogram-057.h5", "tomogram-060.h5", "tomogram-067.h5", "tomogram-074.h5", "tomogram-076.h5", "tomogram-083.h5", "tomogram-133.h5", "tomogram-136.h5", "tomogram-145.h5", "tomogram-149.h5", "tomogram-150.h5"], # noqa
"02": ["tomogram-004.h5", "tomogram-008.h5"],
"03": ["tomogram-003.h5", "tomogram-004.h5", "tomogram-008.h5",],
"04": [], # all used for test
"05": ["tomogram-003.h5", "tomogram-005.h5",],
"07": ["tomogram-006.h5", "tomogram-017.h5",],
"09": [], # no test data
"10": ["tomogram-001.h5", "tomogram-002.h5", "tomogram-007.h5"],
"11": ["tomogram-001.h5 tomogram-007.h5 tomogram-008.h5"],
"12": ["tomogram-004.h5", "tomogram-021.h5", "tomogram-022.h5",],
}

conditions = {
"01": single_ax_tem,
"02": dual_ax_tem,
Expand All @@ -150,7 +146,7 @@ def vesicle_train_data():
"12": (1.554, 1.554, 1.554)
}

aggregate_vesicle_train_data(roots, test_tomograms, conditions, resolutions)
aggregate_vesicle_train_data(roots, conditions, resolutions)


def aggregate_az_train_data(roots, test_tomograms, conditions, resolutions):
Expand Down Expand Up @@ -397,6 +393,11 @@ def vesicle_domain_adaptation_data():
"MF_05649_P-09175-E_06.h5", "MF_05646_C-09175-B_001B.h5", "MF_05649_P-09175-E_07.h5",
"MF_05649_G-09175-C_001.h5", "MF_05646_C-09175-B_002.h5", "MF_05649_G-09175-C_04.h5",
"MF_05649_P-09175-E_05.h5", "MF_05646_C-09175-B_000.h5", "MF_05646_C-09175-B_001.h5"
],
"frog": [
"block10U3A_three.h5", "block30UB_one_two.h5", "block30UB_two.h5", "block10U3A_one.h5",
"block184B_one.h5", "block30UB_three.h5", "block10U3A_two.h5", "block30UB_four.h5",
"block30UB_one.h5", "block10U3A_five.h5",
]
}

Expand Down Expand Up @@ -439,13 +440,42 @@ def vesicle_domain_adaptation_data():
aggregate_da(roots, train_tomograms, test_tomograms, resolutions)


def get_n_images_frog():
root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/rizzoli/extracted/upsampled_by2"
tomos = ["block10U3A_three.h5", "block30UB_one_two.h5", "block30UB_two.h5", "block10U3A_one.h5",
"block184B_one.h5", "block30UB_three.h5", "block10U3A_two.h5", "block30UB_four.h5",
"block30UB_one.h5", "block10U3A_five.h5"]

n_images = 0
for tomo in tomos:
path = os.path.join(root, tomo)
with h5py.File(path, "r") as f:
n_images += f["raw"].shape[0]
print(n_images)


def get_image_sizes_tem_2d():
root = "/mnt/lustre-emmy-hdd/projects/nim00007/data/synaptic-reconstruction/cooper/2D_data/maus_2020_tem2d_wt_unt_div14_exported_scaled/good_for_DAtraining/maus_2020_tem2d_wt_unt_div14_exported_scaled" # noqa
tomos = [
"MF_05649_P-09175-E_06.h5", "MF_05646_C-09175-B_001B.h5", "MF_05649_P-09175-E_07.h5",
"MF_05649_G-09175-C_001.h5", "MF_05646_C-09175-B_002.h5", "MF_05649_G-09175-C_04.h5",
"MF_05649_P-09175-E_05.h5", "MF_05646_C-09175-B_000.h5", "MF_05646_C-09175-B_001.h5"
]
for tomo in tomos:
path = os.path.join(root, tomo)
with h5py.File(path, "r") as f:
print(f["raw"].shape)


def main():
# active_zone_train_data()
# compartment_train_data()
# mito_train_data()
# vesicle_train_data()
vesicle_train_data()

vesicle_domain_adaptation_data()
# vesicle_domain_adaptation_data()
# get_n_images_frog()
# get_image_sizes_tem_2d()


main()
Loading