WayScience · MattsonCam · Feb 20, 2024 · Feb 15, 2024 · Feb 15, 2024
diff --git a/...og_reg_comparison_results/class_balanced_well_log_reg_areashape_model_comparisons.parquet b/...og_reg_comparison_results/class_balanced_well_log_reg_areashape_model_comparisons.parquet
diff --git a/3.analyze_data/log_reg_class_balanced_areashape_analyze_well_predicted_probabilities.ipynb b/3.analyze_data/log_reg_class_balanced_areashape_analyze_well_predicted_probabilities.ipynb
@@ -16,10 +16,10 @@
    "execution_count": 1,
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2024-02-13T16:50:33.149881Z",
-     "iopub.status.busy": "2024-02-13T16:50:33.149461Z",
-     "iopub.status.idle": "2024-02-13T16:50:33.787799Z",
-     "shell.execute_reply": "2024-02-13T16:50:33.787164Z"
+     "iopub.execute_input": "2024-02-15T02:41:40.916400Z",
+     "iopub.status.busy": "2024-02-15T02:41:40.916035Z",
+     "iopub.status.idle": "2024-02-15T02:41:41.540995Z",
+     "shell.execute_reply": "2024-02-15T02:41:41.540291Z"
     },
     "jukit_cell_id": "551uyQMpHm"
    },
@@ -50,10 +50,10 @@
    "execution_count": 2,
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2024-02-13T16:50:33.791419Z",
-     "iopub.status.busy": "2024-02-13T16:50:33.790943Z",
-     "iopub.status.idle": "2024-02-13T16:50:33.795904Z",
-     "shell.execute_reply": "2024-02-13T16:50:33.795140Z"
+     "iopub.execute_input": "2024-02-15T02:41:41.544680Z",
+     "iopub.status.busy": "2024-02-15T02:41:41.544333Z",
+     "iopub.status.idle": "2024-02-15T02:41:41.549278Z",
+     "shell.execute_reply": "2024-02-15T02:41:41.548630Z"
     },
     "jukit_cell_id": "2h8CHkah8U"
    },
@@ -91,10 +91,10 @@
    "execution_count": 3,
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2024-02-13T16:50:33.800109Z",
-     "iopub.status.busy": "2024-02-13T16:50:33.799251Z",
-     "iopub.status.idle": "2024-02-13T16:50:33.817524Z",
-     "shell.execute_reply": "2024-02-13T16:50:33.816303Z"
+     "iopub.execute_input": "2024-02-15T02:41:41.552089Z",
+     "iopub.status.busy": "2024-02-15T02:41:41.551720Z",
+     "iopub.status.idle": "2024-02-15T02:41:41.569857Z",
+     "shell.execute_reply": "2024-02-15T02:41:41.569152Z"
     },
     "jukit_cell_id": "zDRghRntC5"
    },
@@ -111,14 +111,14 @@
     "# Path to the plate probability data\n",
     "proba_path = pathlib.Path(f\"{big_drive_path}/class_balanced_log_reg_probability_sc_data\")\n",
     "\n",
-    "# Path to the platemap\n",
-    "bar_plate_path = f\"{ref_path}/barcode_platemap.csv\"\n",
-    "\n",
     "# Paths of each plate file\n",
     "proba_plate_paths = proba_path.glob(\"*.parquet\")\n",
     "\n",
     "# Define barcode platemap dataframe\n",
-    "barcode_platemapdf = pd.read_csv(bar_plate_path)\n",
+    "barcode_platemapdf = pd.read_csv(f\"{ref_path}/barcode_platemap.csv\")\n",
+    "\n",
+    "# Define experiment metadata dataframe\n",
+    "exmetadf = pd.read_csv(f\"{ref_path}/experiment-metadata.tsv\", sep=\"\\t\")\n",
     "\n",
     "# Metadata and platemap paths and the name of the treatment_columns for each treatment type\n",
     "treatment_data = {\n",
@@ -154,10 +154,10 @@
    "execution_count": 4,
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2024-02-13T16:50:33.822937Z",
-     "iopub.status.busy": "2024-02-13T16:50:33.822451Z",
-     "iopub.status.idle": "2024-02-13T16:50:33.826910Z",
-     "shell.execute_reply": "2024-02-13T16:50:33.826184Z"
+     "iopub.execute_input": "2024-02-15T02:41:41.573813Z",
+     "iopub.status.busy": "2024-02-15T02:41:41.573429Z",
+     "iopub.status.idle": "2024-02-15T02:41:41.577151Z",
+     "shell.execute_reply": "2024-02-15T02:41:41.576572Z"
     },
     "jukit_cell_id": "bvzjj5dqSv"
    },
@@ -182,10 +182,10 @@
    "execution_count": 5,
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2024-02-13T16:50:33.829875Z",
-     "iopub.status.busy": "2024-02-13T16:50:33.829563Z",
-     "iopub.status.idle": "2024-02-13T16:50:33.833903Z",
-     "shell.execute_reply": "2024-02-13T16:50:33.833213Z"
+     "iopub.execute_input": "2024-02-15T02:41:41.580001Z",
+     "iopub.status.busy": "2024-02-15T02:41:41.579503Z",
+     "iopub.status.idle": "2024-02-15T02:41:41.583510Z",
+     "shell.execute_reply": "2024-02-15T02:41:41.582894Z"
     },
     "jukit_cell_id": "nSE3TR3l3H"
    },
@@ -218,6 +218,32 @@
     "# Process the data"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "jukit_cell_id": "g3ZLCQHWe0"
+   },
+   "source": [
+    "## Combine barcode platemap and experiment metadata"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-02-15T02:41:41.586598Z",
+     "iopub.status.busy": "2024-02-15T02:41:41.586224Z",
+     "iopub.status.idle": "2024-02-15T02:41:41.593473Z",
+     "shell.execute_reply": "2024-02-15T02:41:41.592850Z"
+    },
+    "jukit_cell_id": "uHP6MeePWG"
+   },
+   "outputs": [],
+   "source": [
+    "barcode_platemapdf = pd.merge(barcode_platemapdf, exmetadf, how=\"inner\", on=[\"Assay_Plate_Barcode\", \"Plate_Map_Name\"])"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {
@@ -229,13 +255,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2024-02-13T16:50:33.836837Z",
-     "iopub.status.busy": "2024-02-13T16:50:33.836481Z",
-     "iopub.status.idle": "2024-02-13T16:50:33.842210Z",
-     "shell.execute_reply": "2024-02-13T16:50:33.841571Z"
+     "iopub.execute_input": "2024-02-15T02:41:41.596349Z",
+     "iopub.status.busy": "2024-02-15T02:41:41.596036Z",
+     "iopub.status.idle": "2024-02-15T02:41:41.603030Z",
+     "shell.execute_reply": "2024-02-15T02:41:41.602399Z"
     },
     "jukit_cell_id": "JZE2tY7CHu"
    },
@@ -288,13 +314,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2024-02-13T16:50:33.845051Z",
-     "iopub.status.busy": "2024-02-13T16:50:33.844733Z",
-     "iopub.status.idle": "2024-02-13T16:50:33.849386Z",
-     "shell.execute_reply": "2024-02-13T16:50:33.848746Z"
+     "iopub.execute_input": "2024-02-15T02:41:41.605904Z",
+     "iopub.status.busy": "2024-02-15T02:41:41.605440Z",
+     "iopub.status.idle": "2024-02-15T02:41:41.609808Z",
+     "shell.execute_reply": "2024-02-15T02:41:41.609243Z"
     },
     "jukit_cell_id": "8a6YT36HEf"
    },
@@ -317,13 +343,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2024-02-13T16:50:33.852375Z",
-     "iopub.status.busy": "2024-02-13T16:50:33.851961Z",
-     "iopub.status.idle": "2024-02-13T16:50:33.855510Z",
-     "shell.execute_reply": "2024-02-13T16:50:33.854938Z"
+     "iopub.execute_input": "2024-02-15T02:41:41.612693Z",
+     "iopub.status.busy": "2024-02-15T02:41:41.612165Z",
+     "iopub.status.idle": "2024-02-15T02:41:41.615241Z",
+     "shell.execute_reply": "2024-02-15T02:41:41.614736Z"
     },
     "jukit_cell_id": "19p1I4wojL"
    },
@@ -348,20 +374,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 10,
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2024-02-13T16:50:33.858312Z",
-     "iopub.status.busy": "2024-02-13T16:50:33.857881Z",
-     "iopub.status.idle": "2024-02-13T17:41:23.799885Z",
-     "shell.execute_reply": "2024-02-13T17:41:23.799345Z"
+     "iopub.execute_input": "2024-02-15T02:41:41.617660Z",
+     "iopub.status.busy": "2024-02-15T02:41:41.617365Z",
+     "iopub.status.idle": "2024-02-15T04:27:43.938994Z",
+     "shell.execute_reply": "2024-02-15T04:27:43.938561Z"
     },
     "jukit_cell_id": "9pgP4YdWo4"
    },
    "outputs": [],
    "source": [
     "# Define columns to group by\n",
-    "filt_cols = ['Metadata_Plate', 'treatment', 'Metadata_model_type', 'treatment_type', 'Metadata_Well']\n",
+    "filt_cols = ['Metadata_Plate', 'treatment', 'Metadata_model_type', 'treatment_type', 'Metadata_Well', 'Cell_type']\n",
+    "\n",
+    "# Columns of interest which should also be tracked\n",
+    "tracked_cols = [\"Time\"]\n",
     "\n",
     "# Store phenotype column names\n",
     "phenotype_cols = None\n",
@@ -386,7 +415,8 @@
     "        common_broaddf.loc[common_broaddf[\"control_type\"] != \"negcon\"],\n",
     "        common_broaddf.loc[common_broaddf[\"control_type\"] == \"negcon\"],\n",
     "        phenotype_cols,\n",
-    "        filt_cols\n",
+    "        filt_cols,\n",
+    "        tracked_cols\n",
     "    )\n",
     "\n",
     "    # Define the comparisons data structure for the first time\n",
@@ -410,13 +440,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 11,
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2024-02-13T17:41:23.803502Z",
-     "iopub.status.busy": "2024-02-13T17:41:23.803140Z",
-     "iopub.status.idle": "2024-02-13T17:41:24.546385Z",
-     "shell.execute_reply": "2024-02-13T17:41:24.545800Z"
+     "iopub.execute_input": "2024-02-15T04:27:43.941786Z",
+     "iopub.status.busy": "2024-02-15T04:27:43.941273Z",
+     "iopub.status.idle": "2024-02-15T04:27:44.735628Z",
+     "shell.execute_reply": "2024-02-15T04:27:44.734984Z"
     },
     "jukit_cell_id": "KcqfcwXHtJ"
    },

diff --git a/...data/nbconverted/log_reg_class_balanced_areashape_analyze_well_predicted_probabilities.py b/...data/nbconverted/log_reg_class_balanced_areashape_analyze_well_predicted_probabilities.py
@@ -58,14 +58,14 @@
 # Path to the plate probability data
 proba_path = pathlib.Path(f"{big_drive_path}/class_balanced_log_reg_probability_sc_data")
 
-# Path to the platemap
-bar_plate_path = f"{ref_path}/barcode_platemap.csv"
-
 # Paths of each plate file
 proba_plate_paths = proba_path.glob("*.parquet")
 
 # Define barcode platemap dataframe
-barcode_platemapdf = pd.read_csv(bar_plate_path)
+barcode_platemapdf = pd.read_csv(f"{ref_path}/barcode_platemap.csv")
+
+# Define experiment metadata dataframe
+exmetadf = pd.read_csv(f"{ref_path}/experiment-metadata.tsv", sep="\t")
 
 # Metadata and platemap paths and the name of the treatment_columns for each treatment type
 treatment_data = {
@@ -122,11 +122,19 @@ def perform_ks_test(_dmso_probs, _treatment_probs):
 
 # # Process the data
 
-# ## Combine the model probabilty and plate data
+# ## Combine barcode platemap and experiment metadata
 
 # In[6]:
 
 
+barcode_platemapdf = pd.merge(barcode_platemapdf, exmetadf, how="inner", on=["Assay_Plate_Barcode", "Plate_Map_Name"])
+
+
+# ## Combine the model probabilty and plate data
+
+# In[7]:
+
+
 def combine_meta(probadf):
     """
     Parameters
@@ -172,7 +180,7 @@ def combine_meta(probadf):
     return common_broaddf
 
 
-# In[7]:
+# In[8]:
 
 
 # Fill blank broad samples in the broad_sample column with DMSO.
@@ -183,7 +191,7 @@ def combine_meta(probadf):
 
 # ## Defining tests and aggregation metric names
 
-# In[8]:
+# In[9]:
 
 
 # Create a dictionary where the keys represent the name of the comparison or test, and the values are dictionaries
@@ -196,11 +204,14 @@ def combine_meta(probadf):
 
 # ## Compare treatments and negative controls
 
-# In[9]:
+# In[10]:
 
 
 # Define columns to group by
-filt_cols = ['Metadata_Plate', 'treatment', 'Metadata_model_type', 'treatment_type', 'Metadata_Well']
+filt_cols = ['Metadata_Plate', 'treatment', 'Metadata_model_type', 'treatment_type', 'Metadata_Well', 'Cell_type']
+
+# Columns of interest which should also be tracked
+tracked_cols = ["Time"]
 
 # Store phenotype column names
 phenotype_cols = None
@@ -225,7 +236,8 @@ def combine_meta(probadf):
         common_broaddf.loc[common_broaddf["control_type"] != "negcon"],
         common_broaddf.loc[common_broaddf["control_type"] == "negcon"],
         phenotype_cols,
-        filt_cols
+        filt_cols,
+        tracked_cols
     )
 
     # Define the comparisons data structure for the first time
@@ -240,7 +252,7 @@ def combine_meta(probadf):
 
 # ## Save the output of the treatment
 
-# In[10]:
+# In[11]:
 
 
 comparisons = pd.DataFrame(comparisons)

diff --git a/3.analyze_data/utils/well_significance_testing.py b/3.analyze_data/utils/well_significance_testing.py
@@ -96,7 +96,7 @@ def samp_well(_well_samp):
 
     return _welldf.groupby('Metadata_Well', group_keys=False).apply(samp_well)
 
-def get_treatment_comparison(_comp_functions, _treatdf, _negcondf, _phenotype_cols, _filt_cols, _control_cutoff = 50, _treat_cutoff = 50):
+def get_treatment_comparison(_comp_functions, _treatdf, _negcondf, _phenotype_cols, _filt_cols, _tracked_cols = [], _control_cutoff = 50, _treat_cutoff = 50):
     """
     This function is intended to preprocess the predicted MitoCheck phenotype probability data prior to comparing the phenotype predicted probabilities.
     Please refer to the README for additional information on how the treatment and control groups are compared.
@@ -120,6 +120,9 @@ def get_treatment_comparison(_comp_functions, _treatdf, _negcondf, _phenotype_co
     _filt_cols: List
         The names of the columns to group the treatment cells by before comparing the probabilities.
 
+    _tracked_cols: List
+        (Optional) The names of the columns to be tracked in the final output in addition to _filt_cols and _phenotype_cols.
+
     _control_cutoff: Integer
         (Optional default=50) The minimum number of cells required for a negative control well to be included in the comparison.
 
@@ -140,9 +143,14 @@ def get_treatment_comparison(_comp_functions, _treatdf, _negcondf, _phenotype_co
         # The columns for keeping track of metadata and filtering the negative control cells
         ref_cols = dict(zip(_filt_cols, filt_col_vals))
 
+        # Include other columns of interest not directly used for comparing probabilities if specified
+        if _tracked_cols:
+            ref_cols = {**ref_cols, **dict(zip(_tracked_cols, group_treatdf[_tracked_cols].iloc[0]))}
+
         # The negative control cells
         group_negdf = _negcondf.loc[(_negcondf["Metadata_Plate"] == ref_cols["Metadata_Plate"]) &
-                                   (_negcondf["Metadata_model_type"] == ref_cols["Metadata_model_type"])
+                                   (_negcondf["Metadata_model_type"] == ref_cols["Metadata_model_type"]) &
+                                    (_negcondf["Cell_type"] == ref_cols["Cell_type"])
                                     ]
 
         # Remove wells if the cell count is below the corresponding threshold