Skip to content

Commit

Permalink
Merge pull request #71 from ivadomed/jca/preproc-dilate
Browse files Browse the repository at this point in the history
Update pre-processing script to use new SCT dilate feature
  • Loading branch information
jcohenadad authored Mar 10, 2023
2 parents 54d0d00 + 515d35a commit 1409cb1
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 75 deletions.
10 changes: 5 additions & 5 deletions config/config_seg_lesion.json
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
{
"command": "train",
"gpu_ids": [4],
"path_output": "/home/GRAMES.POLYMTL.CA/p101317/data_nvme_p101317/model_seg_lesion_mp2rage_20230102_144854",
"path_output": "/home/GRAMES.POLYMTL.CA/p101317/data_nvme_p101317/model_seg_lesion_mp2rage_",
"model_name": "model_seg_lesion_mp2rage",
"debugging": true,
"log_file": "train.log",
"wandb": {
"wandb_api_key": "9095e2bc9e4ab445d478c9c8a81759ae908be8c6",
"project_name": "basel-mp2rage-lesion",
"group_name": "r20230127",
"run_name": "seed11",
"group_name": "r20230210",
"run_name": "seed01",
"log_grads_every": 100
},
"loader_parameters": {
"path_data": ["/home/GRAMES.POLYMTL.CA/p101317/data_nvme_p101317/data_seg_mp2rage_20230124_132415/data_processed_lesionseg"],
"path_data": ["/home/GRAMES.POLYMTL.CA/p101317/data_nvme_p101317/data_seg_mp2rage_20230210_144231/data_processed_lesionseg"],
"subject_selection": {"n": [], "metadata": [], "value": []},
"target_suffix": ["_lesion-manualNeuroPoly"],
"extensions": [".nii.gz"],
Expand All @@ -37,7 +37,7 @@
},
"split_dataset": {
"fname_split": null,
"random_seed": 11,
"random_seed": 1,
"split_method" : "participant_id",
"data_testing": {"data_type": null, "data_value":[]},
"balance": null,
Expand Down
70 changes: 10 additions & 60 deletions preprocessing/preprocess_data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ cd ${SUBJECT}/anat

# Define variables
file="${SUBJECT}_UNIT1"
file_gt1="${SUBJECT}_UNIT1_lesion-manualNeuroPoly"

# Make sure the image metadata is a valid JSON object
if [[ ! -s ${file}.json ]]; then
Expand All @@ -119,65 +120,23 @@ fi
segment_if_does_not_exist ${file} t1 svm
file_seg="${FILESEG}"

# Dilate spinal cord mask
sct_maths -i ${file_seg}.nii.gz -dilate 2 -shape ball -o ${file_seg}_dilate.nii.gz
sct_maths -i ${file_seg}_dilate.nii.gz -dilate 32 -dim 1 -shape disk -o ${file_seg}_dilate.nii.gz

# Use dilated mask to crop the original image and manual MS segmentations
sct_crop_image -i ${file}.nii.gz -m ${file_seg}_dilate.nii.gz -o ${file}_crop.nii.gz
# Use mask to crop the original image
DILATE="32x3x32"
sct_crop_image -i ${file}.nii.gz -m ${file_seg}.nii.gz -dilate ${DILATE} -o ${file}_crop.nii.gz

# Go to subject folder for segmentation GTs
cd $PATH_DATA_PROCESSED/derivatives/labels/$SUBJECT/anat
# Crop the manual MS lesion segmentation
sct_crop_image -i $PATH_DATA_PROCESSED/derivatives/labels/$SUBJECT/anat/${file_gt1}.nii.gz -m ${file_seg}.nii.gz -dilate ${DILATE} -o $PATH_DATA_PROCESSED/derivatives/labels/$SUBJECT/anat/${file_gt1}_crop.nii.gz

# Define variables
file_gt1="${SUBJECT}_UNIT1_lesion-manualNeuroPoly"
# Until we figure out a way to inclure more ground truth, we only use manualHaris segmentation.
# In the future, manualNeuroPoly should be use if it exists.
#file_gt2="${SUBJECT}_UNIT1_lesion-manual2"
#file_gtc="${SUBJECT}_UNIT1_lesion-manual-majvote"
#file_soft="${SUBJECT}_UNIT1_lesion-manual-soft"
# 'c' stands for the consensus GT

# Redefine variable for final SC segmentation mask as path changed
file_seg_dil=${PATH_DATA_PROCESSED}/${SUBJECT}/anat/${file_seg}_dilate

# Make sure the first rater metadata is a valid JSON object
if [[ ! -s ${file_gt1}.json ]]; then
echo "{}" >> ${file_gt1}.json
# Make sure a JSON file is present, if not create an empty one
if [[ ! -s $PATH_DATA_PROCESSED/derivatives/labels/$SUBJECT/anat/${file_gt1}.json ]]; then
echo "{}" >> $PATH_DATA_PROCESSED/derivatives/labels/$SUBJECT/anat/${file_gt1}.json
fi

# Aggregate multiple raters if second rater is present
#if [[ -f ${file_gt2}.nii.gz ]]; then
# Make sure the second rater metadata is a valid JSON object
# if [[ ! -s ${file_gt2}.json ]]; then
# echo "{}" >> ${file_gt2}.json
# fi
# Create consensus ground truth by majority vote
# sct_maths -i ${file_gt1}.nii.gz -add ${file_gt2}.nii.gz -o lesion_sum.nii.gz
# sct_maths -i lesion_sum.nii.gz -sub 1 -o lesion_sum_minusone.nii.gz
# binarize: everything that is 0.5 and below 0.5 becomes 0.
# sct_maths -i lesion_sum_minusone.nii.gz -thr 0.5 -o ${file_gtc}.nii.gz

# Create soft ground truth by averaging all raters
# sct_maths -i lesion_sum.nii.gz -div 2 -o ${file_soft}.nii.gz

# Crop the manual segs
# sct_crop_image -i ${file_gt2}.nii.gz -m ${file_seg_dil}.nii.gz -o ${file_gt2}_crop.nii.gz
# sct_crop_image -i ${file_gtc}.nii.gz -m ${file_seg_dil}.nii.gz -o ${file_gtc}_crop.nii.gz
# sct_crop_image -i ${file_soft}.nii.gz -m ${file_seg_dil}.nii.gz -o ${file_soft}_crop.nii.gz
#fi

# Crop the manual seg
sct_crop_image -i ${file_gt1}.nii.gz -m ${file_seg_dil}.nii.gz -o ${file_gt1}_crop.nii.gz

# Go back to the root output path
cd $PATH_OUTPUT

# Create clean data processed folders for two tasks: spinal cord (SC) segmentation and lesion segmentation
PATH_DATA_PROCESSED_SCSEG="${PATH_DATA_PROCESSED}_scseg"
PATH_DATA_PROCESSED_LESIONSEG="${PATH_DATA_PROCESSED}_lesionseg"

# Copy over required BIDs files to both folders
# Copy over required BIDS files to both folders
mkdir -p $PATH_DATA_PROCESSED_SCSEG $PATH_DATA_PROCESSED_SCSEG/${SUBJECT} $PATH_DATA_PROCESSED_SCSEG/${SUBJECT}/anat
mkdir -p $PATH_DATA_PROCESSED_LESIONSEG $PATH_DATA_PROCESSED_LESIONSEG/${SUBJECT} $PATH_DATA_PROCESSED_LESIONSEG/${SUBJECT}/anat
rsync -avzh $PATH_DATA_PROCESSED/dataset_description.json $PATH_DATA_PROCESSED_SCSEG/
Expand Down Expand Up @@ -208,15 +167,6 @@ rsync -avzh $PATH_DATA_PROCESSED/${SUBJECT}/anat/${file}.json $PATH_DATA_PROCESS
mkdir -p $PATH_DATA_PROCESSED_LESIONSEG/derivatives $PATH_DATA_PROCESSED_LESIONSEG/derivatives/labels $PATH_DATA_PROCESSED_LESIONSEG/derivatives/labels/${SUBJECT} $PATH_DATA_PROCESSED_LESIONSEG/derivatives/labels/${SUBJECT}/anat/
rsync -avzh $PATH_DATA_PROCESSED/derivatives/labels/${SUBJECT}/anat/${file_gt1}_crop.nii.gz $PATH_DATA_PROCESSED_LESIONSEG/derivatives/labels/${SUBJECT}/anat/${file_gt1}.nii.gz
rsync -avzh $PATH_DATA_PROCESSED/derivatives/labels/${SUBJECT}/anat/${file_gt1}.json $PATH_DATA_PROCESSED_LESIONSEG/derivatives/labels/${SUBJECT}/anat/${file_gt1}.json
# If second rater is present, copy the other files
#if [[ -f ${PATH_DATA_PROCESSED}/derivatives/labels/${SUBJECT}/anat/${file_gt2}.nii.gz ]]; then
# Copy the second rater GT and aggregated GTs if second rater is present
# rsync -avzh $PATH_DATA_PROCESSED/derivatives/labels/${SUBJECT}/anat/${file_gt2}_crop.nii.gz $PATH_DATA_PROCESSED_LESIONSEG/derivatives/labels/${SUBJECT}/anat/${file_gt2}.nii.gz
# rsync -avzh $PATH_DATA_PROCESSED/derivatives/labels/${SUBJECT}/anat/${file_gt2}.json $PATH_DATA_PROCESSED_LESIONSEG/derivatives/labels/${SUBJECT}/anat/${file_gt2}.json
# rsync -avzh $PATH_DATA_PROCESSED/derivatives/labels/${SUBJECT}/anat/${file_gtc}_crop.nii.gz $PATH_DATA_PROCESSED_LESIONSEG/derivatives/labels/${SUBJECT}/anat/${file_gtc}.nii.gz
# rsync -avzh $PATH_DATA_PROCESSED/derivatives/labels/${SUBJECT}/anat/${file_soft}_crop.nii.gz $PATH_DATA_PROCESSED_LESIONSEG/derivatives/labels/${SUBJECT}/anat/${file_soft}.nii.gz
#fi


# Display useful info for the log
end=`date +%s`
Expand Down
19 changes: 9 additions & 10 deletions preprocessing/qc_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@

# Log problematic subjects for QC
failed_crop_subjects, shape_mismatch_subjects, left_out_lesion_subjects = [], [], []
no_groundtruth = []

# Perform QC on each subject
for subject in tqdm(subjects, desc='Iterating over Subjects'):
Expand All @@ -60,30 +61,28 @@
resolutions.append(resolution)

# Read original and cropped subject ground-truths (GT)
gt1_fpath = os.path.join(subject_labels_path, '%s_UNIT1_lesion-manual.nii.gz' % subject)
gt1_crop_fpath = os.path.join(subject_labels_path, '%s_UNIT1_lesion-manual_crop.nii.gz' % subject)
gt2_fpath = os.path.join(subject_labels_path, '%s_UNIT1_lesion-manual2.nii.gz' % subject)
gt2_crop_fpath = os.path.join(subject_labels_path, '%s_UNIT1_lesion-manual2_crop.nii.gz' % subject)
gt1_fpath = os.path.join(subject_labels_path, '%s_UNIT1_lesion-manualNeuroPoly.nii.gz' % subject)
if not os.path.exists(gt1_fpath):
no_groundtruth.append(subject)
continue
gt1_crop_fpath = os.path.join(subject_labels_path, '%s_UNIT1_lesion-manualNeuroPoly_crop.nii.gz' % subject)

gt1 = nib.load(gt1_fpath)
gt1_crop = nib.load(gt1_crop_fpath)
gt2 = nib.load(gt2_fpath)
gt2_crop = nib.load(gt2_crop_fpath)

# Basic shape checks
if not img_crop.shape == gt1_crop.shape == gt2_crop.shape:
if not img_crop.shape == gt1_crop.shape:
shape_mismatch_subjects.append(subject)
continue

# Check if the dilated SC mask leaves out any lesions from GTs (from each rater)
if not (np.allclose(np.sum(gt1.get_fdata()), np.sum(gt1_crop.get_fdata())) and
np.allclose(np.sum(gt2.get_fdata()), np.sum(gt2_crop.get_fdata()))):
if not (np.allclose(np.sum(gt1.get_fdata()), np.sum(gt1_crop.get_fdata()))):
left_out_lesion_subjects.append(subject)

print('RESOLUTIONS: ', Counter(resolutions))
print('SIZES: ', Counter(sizes))
print('CROP SIZES: ', Counter(crop_sizes))

print('List of missing ground truth: ', no_groundtruth)
print('Could not find cropped image for the following subjects: ', failed_crop_subjects)
print('Found shape mismatch in images and GTs for the following subjects: ', shape_mismatch_subjects)
print('ALERT: Lesion(s) from raters cropped during preprocessing for the following subjects: ', left_out_lesion_subjects)

0 comments on commit 1409cb1

Please sign in to comment.