Update inner ear analysis

computational-cell-analytics · SarahMuth · Oct 23, 2024 · Oct 28, 2024 · Oct 28, 2024 · Oct 28, 2024
commit ad4741b72b3a36041d341f55a6bf0269c20ed3d5
diff --git a/scripts/inner_ear/analysis/.gitignore b/scripts/inner_ear/analysis/.gitignore
@@ -0,0 +1,2 @@
+panels/
+*.zip
diff --git a/scripts/inner_ear/analysis/analyze_distances.py b/scripts/inner_ear/analysis/analyze_distances.py
@@ -6,17 +6,91 @@
 
 
 def for_tomos_with_annotation():
-    manual_assignments, automatic_assignments = get_measurements_with_annotation()
-    breakpoint()
+    manual_assignments, semi_automatic_assignments, automatic_assignments = get_measurements_with_annotation()
 
+    manual_distances = manual_assignments[
+        ["pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"]
+    ]
+    manual_distances["approach"] = ["manual"] * len(manual_distances)
 
-# def for_all_tomos():
-#     automatic_assignments = get_all_measurements()
+    semi_automatic_distances = semi_automatic_assignments[
+        ["pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"]
+    ]
+    semi_automatic_distances["approach"] = ["semi_automatic"] * len(semi_automatic_distances)
+
+    automatic_distances = automatic_assignments[
+        ["pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"]
+    ]
+    automatic_distances["approach"] = ["automatic"] * len(automatic_distances)
+
+    distances = pd.concat([manual_distances, semi_automatic_distances, automatic_distances])
+    distances.to_excel("./results/distances_with_manual_annotations.xlsx", index=False)
+
+    pools = pd.unique(distances["pool"])
+    dist_cols = ["ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"]
+
+    fig, axes = plt.subplots(3, 3)
+
+    # multiple = "stack"
+    multiple = "layer"
+
+    structures = ["Ribbon", "PD", "Boundary"]
+    for i, pool in enumerate(pools):
+        pool_distances = distances[distances["pool"] == pool]
+        for j, dist_col in enumerate(dist_cols):
+            ax = axes[i, j]
+            ax.set_title(f"{pool} to {structures[j]}")
+            sns.histplot(
+                data=pool_distances, x=dist_col, hue="approach", multiple=multiple, kde=False, ax=ax
+            )
+            ax.set_xlabel("distance [nm]")
+
+    fig.tight_layout()
+    plt.show()
+
+
+def for_all_tomos():
+    semi_automatic_assignments, automatic_assignments = get_all_measurements()
+
+    semi_automatic_distances = semi_automatic_assignments[
+        ["pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"]
+    ]
+    semi_automatic_distances["approach"] = ["semi_automatic"] * len(semi_automatic_distances)
+
+    automatic_distances = automatic_assignments[
+        ["pool", "ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"]
+    ]
+    automatic_distances["approach"] = ["automatic"] * len(automatic_distances)
+
+    distances = pd.concat([semi_automatic_distances, automatic_distances])
+    distances.to_excel("./results/distances_all_tomograms.xlsx", index=False)
+
+    pools = pd.unique(distances["pool"])
+    dist_cols = ["ribbon_distance [nm]", "pd_distance [nm]", "boundary_distance [nm]"]
+
+    fig, axes = plt.subplots(3, 3)
+
+    # multiple = "stack"
+    multiple = "layer"
+
+    structures = ["Ribbon", "PD", "Boundary"]
+    for i, pool in enumerate(pools):
+        pool_distances = distances[distances["pool"] == pool]
+        for j, dist_col in enumerate(dist_cols):
+            ax = axes[i, j]
+            ax.set_title(f"{pool} to {structures[j]}")
+            sns.histplot(
+                data=pool_distances, x=dist_col, hue="approach", multiple=multiple, kde=False, ax=ax
+            )
+            ax.set_xlabel("distance [nm]")
+
+    fig.tight_layout()
+    plt.show()
 
 
 def main():
     for_tomos_with_annotation()
-    # for_all_tomos()
+    for_all_tomos()
 
 
 if __name__ == "__main__":

diff --git a/scripts/inner_ear/analysis/analyze_vesicle_pools.py b/scripts/inner_ear/analysis/analyze_vesicle_pools.py
@@ -34,63 +34,61 @@ def plot_pools(data, errors):
     plt.show()
 
 
-# TODO use the actual results without vesicle post-processing.
 def for_tomos_with_annotation():
-    manual_assignments, automatic_assignments = get_measurements_with_annotation()
+    manual_assignments, semi_automatic_assignments, automatic_assignments = get_measurements_with_annotation()
 
     manual_counts = manual_assignments.groupby(["tomogram", "pool"]).size().unstack(fill_value=0)
+    semi_automatic_counts = semi_automatic_assignments.groupby(["tomogram", "pool"]).size().unstack(fill_value=0)
     automatic_counts = automatic_assignments.groupby(["tomogram", "pool"]).size().unstack(fill_value=0)
 
     manual_stats = manual_counts.agg(["mean", "std"]).transpose().reset_index()
+    semi_automatic_stats = semi_automatic_counts.agg(["mean", "std"]).transpose().reset_index()
     automatic_stats = automatic_counts.agg(["mean", "std"]).transpose().reset_index()
 
     data = pd.DataFrame({
         "Pool": manual_stats["pool"],
-        "Manual": manual_stats["mean"],
-        "Semi-automatic": automatic_stats["mean"],
+        "Semi-automatic": semi_automatic_stats["mean"],
         "Automatic": automatic_stats["mean"],
+        "Manual": manual_stats["mean"],
     })
     errors = pd.DataFrame({
         "Pool": manual_stats["pool"],
-        "Manual": manual_stats["std"],
-        "Semi-automatic": automatic_stats["std"],
+        "Semi-automatic": semi_automatic_stats["std"],
         "Automatic": automatic_stats["std"],
+        "Manual": manual_stats["std"],
     })
 
     plot_pools(data, errors)
 
-    output_path = "./vesicle_pools_small.xlsx"
+    output_path = "./results/vesicle_pools_with_manual_annotations.xlsx"
     data.to_excel(output_path, index=False, sheet_name="Average")
     with pd.ExcelWriter(output_path, engine="openpyxl", mode="a") as writer:
         errors.to_excel(writer, sheet_name="StandardDeviation", index=False)
 
 
-# TODO use the actual results without vesicle post-processing.
 def for_all_tomos():
-
-    automatic_assignments = get_all_measurements()
-    # TODO double check why this number is so different! (64 vs. 81)
-    # tomos = pd.unique(automatic_assignments["tomogram"])
-    # print(len(tomos), n_tomos)
-    # assert len(tomos) == n_tomos
+    semi_automatic_assignments, automatic_assignments = get_all_measurements()
 
     automatic_counts = automatic_assignments.groupby(["tomogram", "pool"]).size().unstack(fill_value=0)
     automatic_stats = automatic_counts.agg(["mean", "std"]).transpose().reset_index()
 
+    semi_automatic_counts = semi_automatic_assignments.groupby(["tomogram", "pool"]).size().unstack(fill_value=0)
+    semi_automatic_stats = semi_automatic_counts.agg(["mean", "std"]).transpose().reset_index()
+
     data = pd.DataFrame({
         "Pool": automatic_stats["pool"],
-        "Semi-automatic": automatic_stats["mean"],
+        "Semi-automatic": semi_automatic_stats["mean"],
         "Automatic": automatic_stats["mean"],
     })
     errors = pd.DataFrame({
         "Pool": automatic_stats["pool"],
-        "Semi-automatic": automatic_stats["std"],
+        "Semi-automatic": semi_automatic_stats["std"],
         "Automatic": automatic_stats["std"],
     })
 
     plot_pools(data, errors)
 
-    output_path = "./vesicle_pools_large.xlsx"
+    output_path = "./results/vesicle_pools_all_tomograms.xlsx"
     data.to_excel(output_path, index=False, sheet_name="Average")
     with pd.ExcelWriter(output_path, engine="openpyxl", mode="a") as writer:
         errors.to_excel(writer, sheet_name="StandardDeviation", index=False)

diff --git a/scripts/inner_ear/analysis/combine_fully_automatic_results.py b/scripts/inner_ear/analysis/combine_fully_automatic_results.py
@@ -0,0 +1,69 @@
+import os
+import sys
+
+import pandas as pd
+
+sys.path.append("..")
+sys.path.append("../processing")
+
+
+def combine_fully_auto_results(table, data_root, output_path):
+    from combine_measurements import combine_results
+
+    val_table_path = os.path.join(data_root, "Electron-Microscopy-Susi", "Validierungs-Tabelle-v3.xlsx")
+    val_table = pd.read_excel(val_table_path)
+
+    results = {}
+    for _, row in table.iterrows():
+        folder = row["Local Path"]
+        if folder == "":
+            continue
+
+        row_selection = (val_table.Bedingung == row.Bedingung) &\
+            (val_table.Maus == row.Maus) &\
+            (val_table["Ribbon-Orientierung"] == row["Ribbon-Orientierung"]) &\
+            (val_table["OwnCloud-Unterordner"] == row["OwnCloud-Unterordner"])
+        complete_vals = val_table[row_selection]["Fertig!"].values
+        is_complete = (complete_vals == "ja").all()
+        if not is_complete:
+            continue
+
+        micro = row["EM alt vs. Neu"]
+
+        tomo_name = os.path.relpath(folder, os.path.join(data_root, "Electron-Microscopy-Susi/Analyse"))
+        tab_name = "measurements_uncorrected_assignments.xlsx"
+        res_path = os.path.join(folder, "korrektur", tab_name)
+        if not os.path.exists(res_path):
+            res_path = os.path.join(folder, "Korrektur", tab_name)
+        assert os.path.exists(res_path), res_path
+        results[tomo_name] = (res_path, "alt" if micro == "beides" else micro)
+
+        if micro == "beides":
+            micro = "neu"
+
+            new_root = os.path.join(folder, "neues EM")
+            if not os.path.exists(new_root):
+                new_root = os.path.join(folder, "Tomo neues EM")
+            assert os.path.exists(new_root)
+
+            res_path = os.path.join(new_root, "korrektur", "measurements.xlsx")
+            if not os.path.exists(res_path):
+                res_path = os.path.join(new_root, "Korrektur", "measurements.xlsx")
+            assert os.path.exists(res_path), res_path
+            results[tomo_name] = (res_path, "alt" if micro == "beides" else micro)
+
+    combine_results(results, output_path, sheet_name="vesicles")
+
+
+def main():
+    from parse_table import parse_table, get_data_root
+
+    data_root = get_data_root()
+    table_path = os.path.join(data_root, "Electron-Microscopy-Susi", "Übersicht.xlsx")
+    table = parse_table(table_path, data_root)
+
+    res_path = "../results/fully_automatic_analysis_results.xlsx"
+    combine_fully_auto_results(table, data_root, output_path=res_path)
+
+
+main()
diff --git a/scripts/inner_ear/analysis/common.py b/scripts/inner_ear/analysis/common.py
@@ -1,5 +1,6 @@
 import os
 import sys
+
 import pandas as pd
 
 sys.path.append("../processing")
@@ -13,23 +14,35 @@ def get_manual_assignments():
     return results
 
 
-def get_automatic_assignments(tomograms):
+def get_semi_automatic_assignments(tomograms):
     result_path = "../results/20240917_1/automatic_analysis_results.xlsx"
     results = pd.read_excel(result_path)
     results = results[results["tomogram"].isin(tomograms)]
     return results
 
 
+def get_automatic_assignments(tomograms):
+    result_path = "../results/fully_automatic_analysis_results.xlsx"
+    results = pd.read_excel(result_path)
+    results = results[results["tomogram"].isin(tomograms)]
+    return results
+
+
 def get_measurements_with_annotation():
     manual_assignments = get_manual_assignments()
     manual_tomograms = pd.unique(manual_assignments["tomogram"])
-    automatic_assignments = get_automatic_assignments(manual_tomograms)
+    semi_automatic_assignments = get_semi_automatic_assignments(manual_tomograms)
 
-    tomograms = pd.unique(automatic_assignments["tomogram"])
+    tomograms = pd.unique(semi_automatic_assignments["tomogram"])
     manual_assignments = manual_assignments[manual_assignments["tomogram"].isin(tomograms)]
-    assert len(pd.unique(manual_assignments["tomogram"])) == len(pd.unique(automatic_assignments["tomogram"]))
+    assert len(pd.unique(manual_assignments["tomogram"])) == len(pd.unique(semi_automatic_assignments["tomogram"]))
 
-    return manual_assignments, automatic_assignments
+    automatic_assignments = get_automatic_assignments(tomograms)
+    filtered_tomograms = pd.unique(manual_assignments["tomogram"])
+    assert len(filtered_tomograms) == len(pd.unique(automatic_assignments["tomogram"]))
+
+    print("Tomograms with manual annotations:", len(filtered_tomograms))
+    return manual_assignments, semi_automatic_assignments, automatic_assignments
 
 
 def get_all_measurements():
@@ -39,6 +52,7 @@ def get_all_measurements():
 
     val_table = val_table[val_table["Kommentar 27-10-24"] == "passt"]
     n_tomos = len(val_table)
+    print("All tomograms:", n_tomos)
     assert n_tomos > 0
     tomo_names = []
     for _, row in val_table.iterrows():
@@ -49,5 +63,19 @@ def get_all_measurements():
         )
         tomo_names.append(name)
 
+    semi_automatic_assignments = get_semi_automatic_assignments(tomo_names)
+    filtered_tomo_names = pd.unique(semi_automatic_assignments["tomogram"]).tolist()
+
     automatic_assignments = get_automatic_assignments(tomo_names)
-    return automatic_assignments
+    assert len(filtered_tomo_names) == len(pd.unique(automatic_assignments["tomogram"]))
+
+    return semi_automatic_assignments, automatic_assignments
+
+
+def main():
+    get_measurements_with_annotation()
+    get_all_measurements()
+
+
+if __name__ == "__main__":
+    main()