Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@
"execution_count": 1,
"metadata": {
"execution": {
"iopub.execute_input": "2024-02-13T16:50:33.149881Z",
"iopub.status.busy": "2024-02-13T16:50:33.149461Z",
"iopub.status.idle": "2024-02-13T16:50:33.787799Z",
"shell.execute_reply": "2024-02-13T16:50:33.787164Z"
"iopub.execute_input": "2024-02-15T02:41:40.916400Z",
"iopub.status.busy": "2024-02-15T02:41:40.916035Z",
"iopub.status.idle": "2024-02-15T02:41:41.540995Z",
"shell.execute_reply": "2024-02-15T02:41:41.540291Z"
},
"jukit_cell_id": "551uyQMpHm"
},
Expand Down Expand Up @@ -50,10 +50,10 @@
"execution_count": 2,
"metadata": {
"execution": {
"iopub.execute_input": "2024-02-13T16:50:33.791419Z",
"iopub.status.busy": "2024-02-13T16:50:33.790943Z",
"iopub.status.idle": "2024-02-13T16:50:33.795904Z",
"shell.execute_reply": "2024-02-13T16:50:33.795140Z"
"iopub.execute_input": "2024-02-15T02:41:41.544680Z",
"iopub.status.busy": "2024-02-15T02:41:41.544333Z",
"iopub.status.idle": "2024-02-15T02:41:41.549278Z",
"shell.execute_reply": "2024-02-15T02:41:41.548630Z"
},
"jukit_cell_id": "2h8CHkah8U"
},
Expand Down Expand Up @@ -91,10 +91,10 @@
"execution_count": 3,
"metadata": {
"execution": {
"iopub.execute_input": "2024-02-13T16:50:33.800109Z",
"iopub.status.busy": "2024-02-13T16:50:33.799251Z",
"iopub.status.idle": "2024-02-13T16:50:33.817524Z",
"shell.execute_reply": "2024-02-13T16:50:33.816303Z"
"iopub.execute_input": "2024-02-15T02:41:41.552089Z",
"iopub.status.busy": "2024-02-15T02:41:41.551720Z",
"iopub.status.idle": "2024-02-15T02:41:41.569857Z",
"shell.execute_reply": "2024-02-15T02:41:41.569152Z"
},
"jukit_cell_id": "zDRghRntC5"
},
Expand All @@ -111,14 +111,14 @@
"# Path to the plate probability data\n",
"proba_path = pathlib.Path(f\"{big_drive_path}/class_balanced_log_reg_probability_sc_data\")\n",
"\n",
"# Path to the platemap\n",
"bar_plate_path = f\"{ref_path}/barcode_platemap.csv\"\n",
"\n",
"# Paths of each plate file\n",
"proba_plate_paths = proba_path.glob(\"*.parquet\")\n",
"\n",
"# Define barcode platemap dataframe\n",
"barcode_platemapdf = pd.read_csv(bar_plate_path)\n",
"barcode_platemapdf = pd.read_csv(f\"{ref_path}/barcode_platemap.csv\")\n",
"\n",
"# Define experiment metadata dataframe\n",
"exmetadf = pd.read_csv(f\"{ref_path}/experiment-metadata.tsv\", sep=\"\\t\")\n",
"\n",
"# Metadata and platemap paths and the name of the treatment_columns for each treatment type\n",
"treatment_data = {\n",
Expand Down Expand Up @@ -154,10 +154,10 @@
"execution_count": 4,
"metadata": {
"execution": {
"iopub.execute_input": "2024-02-13T16:50:33.822937Z",
"iopub.status.busy": "2024-02-13T16:50:33.822451Z",
"iopub.status.idle": "2024-02-13T16:50:33.826910Z",
"shell.execute_reply": "2024-02-13T16:50:33.826184Z"
"iopub.execute_input": "2024-02-15T02:41:41.573813Z",
"iopub.status.busy": "2024-02-15T02:41:41.573429Z",
"iopub.status.idle": "2024-02-15T02:41:41.577151Z",
"shell.execute_reply": "2024-02-15T02:41:41.576572Z"
},
"jukit_cell_id": "bvzjj5dqSv"
},
Expand All @@ -182,10 +182,10 @@
"execution_count": 5,
"metadata": {
"execution": {
"iopub.execute_input": "2024-02-13T16:50:33.829875Z",
"iopub.status.busy": "2024-02-13T16:50:33.829563Z",
"iopub.status.idle": "2024-02-13T16:50:33.833903Z",
"shell.execute_reply": "2024-02-13T16:50:33.833213Z"
"iopub.execute_input": "2024-02-15T02:41:41.580001Z",
"iopub.status.busy": "2024-02-15T02:41:41.579503Z",
"iopub.status.idle": "2024-02-15T02:41:41.583510Z",
"shell.execute_reply": "2024-02-15T02:41:41.582894Z"
},
"jukit_cell_id": "nSE3TR3l3H"
},
Expand Down Expand Up @@ -218,6 +218,32 @@
"# Process the data"
]
},
{
"cell_type": "markdown",
"metadata": {
"jukit_cell_id": "g3ZLCQHWe0"
},
"source": [
"## Combine barcode platemap and experiment metadata"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"execution": {
"iopub.execute_input": "2024-02-15T02:41:41.586598Z",
"iopub.status.busy": "2024-02-15T02:41:41.586224Z",
"iopub.status.idle": "2024-02-15T02:41:41.593473Z",
"shell.execute_reply": "2024-02-15T02:41:41.592850Z"
},
"jukit_cell_id": "uHP6MeePWG"
},
"outputs": [],
"source": [
"barcode_platemapdf = pd.merge(barcode_platemapdf, exmetadf, how=\"inner\", on=[\"Assay_Plate_Barcode\", \"Plate_Map_Name\"])"
]
},
{
"cell_type": "markdown",
"metadata": {
Expand All @@ -229,13 +255,13 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 7,
"metadata": {
"execution": {
"iopub.execute_input": "2024-02-13T16:50:33.836837Z",
"iopub.status.busy": "2024-02-13T16:50:33.836481Z",
"iopub.status.idle": "2024-02-13T16:50:33.842210Z",
"shell.execute_reply": "2024-02-13T16:50:33.841571Z"
"iopub.execute_input": "2024-02-15T02:41:41.596349Z",
"iopub.status.busy": "2024-02-15T02:41:41.596036Z",
"iopub.status.idle": "2024-02-15T02:41:41.603030Z",
"shell.execute_reply": "2024-02-15T02:41:41.602399Z"
},
"jukit_cell_id": "JZE2tY7CHu"
},
Expand Down Expand Up @@ -288,13 +314,13 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 8,
"metadata": {
"execution": {
"iopub.execute_input": "2024-02-13T16:50:33.845051Z",
"iopub.status.busy": "2024-02-13T16:50:33.844733Z",
"iopub.status.idle": "2024-02-13T16:50:33.849386Z",
"shell.execute_reply": "2024-02-13T16:50:33.848746Z"
"iopub.execute_input": "2024-02-15T02:41:41.605904Z",
"iopub.status.busy": "2024-02-15T02:41:41.605440Z",
"iopub.status.idle": "2024-02-15T02:41:41.609808Z",
"shell.execute_reply": "2024-02-15T02:41:41.609243Z"
},
"jukit_cell_id": "8a6YT36HEf"
},
Expand All @@ -317,13 +343,13 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 9,
"metadata": {
"execution": {
"iopub.execute_input": "2024-02-13T16:50:33.852375Z",
"iopub.status.busy": "2024-02-13T16:50:33.851961Z",
"iopub.status.idle": "2024-02-13T16:50:33.855510Z",
"shell.execute_reply": "2024-02-13T16:50:33.854938Z"
"iopub.execute_input": "2024-02-15T02:41:41.612693Z",
"iopub.status.busy": "2024-02-15T02:41:41.612165Z",
"iopub.status.idle": "2024-02-15T02:41:41.615241Z",
"shell.execute_reply": "2024-02-15T02:41:41.614736Z"
},
"jukit_cell_id": "19p1I4wojL"
},
Expand All @@ -348,20 +374,23 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 10,
"metadata": {
"execution": {
"iopub.execute_input": "2024-02-13T16:50:33.858312Z",
"iopub.status.busy": "2024-02-13T16:50:33.857881Z",
"iopub.status.idle": "2024-02-13T17:41:23.799885Z",
"shell.execute_reply": "2024-02-13T17:41:23.799345Z"
"iopub.execute_input": "2024-02-15T02:41:41.617660Z",
"iopub.status.busy": "2024-02-15T02:41:41.617365Z",
"iopub.status.idle": "2024-02-15T04:27:43.938994Z",
"shell.execute_reply": "2024-02-15T04:27:43.938561Z"
},
"jukit_cell_id": "9pgP4YdWo4"
},
"outputs": [],
"source": [
"# Define columns to group by\n",
"filt_cols = ['Metadata_Plate', 'treatment', 'Metadata_model_type', 'treatment_type', 'Metadata_Well']\n",
"filt_cols = ['Metadata_Plate', 'treatment', 'Metadata_model_type', 'treatment_type', 'Metadata_Well', 'Cell_type']\n",
"\n",
"# Columns of interest which should also be tracked\n",
"tracked_cols = [\"Time\"]\n",
"\n",
"# Store phenotype column names\n",
"phenotype_cols = None\n",
Expand All @@ -386,7 +415,8 @@
" common_broaddf.loc[common_broaddf[\"control_type\"] != \"negcon\"],\n",
" common_broaddf.loc[common_broaddf[\"control_type\"] == \"negcon\"],\n",
" phenotype_cols,\n",
" filt_cols\n",
" filt_cols,\n",
" tracked_cols\n",
" )\n",
"\n",
" # Define the comparisons data structure for the first time\n",
Expand All @@ -410,13 +440,13 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 11,
"metadata": {
"execution": {
"iopub.execute_input": "2024-02-13T17:41:23.803502Z",
"iopub.status.busy": "2024-02-13T17:41:23.803140Z",
"iopub.status.idle": "2024-02-13T17:41:24.546385Z",
"shell.execute_reply": "2024-02-13T17:41:24.545800Z"
"iopub.execute_input": "2024-02-15T04:27:43.941786Z",
"iopub.status.busy": "2024-02-15T04:27:43.941273Z",
"iopub.status.idle": "2024-02-15T04:27:44.735628Z",
"shell.execute_reply": "2024-02-15T04:27:44.734984Z"
},
"jukit_cell_id": "KcqfcwXHtJ"
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,14 @@
# Path to the plate probability data
proba_path = pathlib.Path(f"{big_drive_path}/class_balanced_log_reg_probability_sc_data")

# Path to the platemap
bar_plate_path = f"{ref_path}/barcode_platemap.csv"

# Paths of each plate file
proba_plate_paths = proba_path.glob("*.parquet")

# Define barcode platemap dataframe
barcode_platemapdf = pd.read_csv(bar_plate_path)
barcode_platemapdf = pd.read_csv(f"{ref_path}/barcode_platemap.csv")

# Define experiment metadata dataframe
exmetadf = pd.read_csv(f"{ref_path}/experiment-metadata.tsv", sep="\t")

# Metadata and platemap paths and the name of the treatment_columns for each treatment type
treatment_data = {
Expand Down Expand Up @@ -122,11 +122,19 @@ def perform_ks_test(_dmso_probs, _treatment_probs):

# # Process the data

# ## Combine the model probabilty and plate data
# ## Combine barcode platemap and experiment metadata

# In[6]:


barcode_platemapdf = pd.merge(barcode_platemapdf, exmetadf, how="inner", on=["Assay_Plate_Barcode", "Plate_Map_Name"])


# ## Combine the model probabilty and plate data

# In[7]:


def combine_meta(probadf):
"""
Parameters
Expand Down Expand Up @@ -172,7 +180,7 @@ def combine_meta(probadf):
return common_broaddf


# In[7]:
# In[8]:


# Fill blank broad samples in the broad_sample column with DMSO.
Expand All @@ -183,7 +191,7 @@ def combine_meta(probadf):

# ## Defining tests and aggregation metric names

# In[8]:
# In[9]:


# Create a dictionary where the keys represent the name of the comparison or test, and the values are dictionaries
Expand All @@ -196,11 +204,14 @@ def combine_meta(probadf):

# ## Compare treatments and negative controls

# In[9]:
# In[10]:


# Define columns to group by
filt_cols = ['Metadata_Plate', 'treatment', 'Metadata_model_type', 'treatment_type', 'Metadata_Well']
filt_cols = ['Metadata_Plate', 'treatment', 'Metadata_model_type', 'treatment_type', 'Metadata_Well', 'Cell_type']

# Columns of interest which should also be tracked
tracked_cols = ["Time"]

# Store phenotype column names
phenotype_cols = None
Expand All @@ -225,7 +236,8 @@ def combine_meta(probadf):
common_broaddf.loc[common_broaddf["control_type"] != "negcon"],
common_broaddf.loc[common_broaddf["control_type"] == "negcon"],
phenotype_cols,
filt_cols
filt_cols,
tracked_cols
)

# Define the comparisons data structure for the first time
Expand All @@ -240,7 +252,7 @@ def combine_meta(probadf):

# ## Save the output of the treatment

# In[10]:
# In[11]:


comparisons = pd.DataFrame(comparisons)
Expand Down
12 changes: 10 additions & 2 deletions 3.analyze_data/utils/well_significance_testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def samp_well(_well_samp):

return _welldf.groupby('Metadata_Well', group_keys=False).apply(samp_well)

def get_treatment_comparison(_comp_functions, _treatdf, _negcondf, _phenotype_cols, _filt_cols, _control_cutoff = 50, _treat_cutoff = 50):
def get_treatment_comparison(_comp_functions, _treatdf, _negcondf, _phenotype_cols, _filt_cols, _tracked_cols = [], _control_cutoff = 50, _treat_cutoff = 50):
"""
This function is intended to preprocess the predicted MitoCheck phenotype probability data prior to comparing the phenotype predicted probabilities.
Please refer to the README for additional information on how the treatment and control groups are compared.
Expand All @@ -120,6 +120,9 @@ def get_treatment_comparison(_comp_functions, _treatdf, _negcondf, _phenotype_co
_filt_cols: List
The names of the columns to group the treatment cells by before comparing the probabilities.

_tracked_cols: List
(Optional) The names of the columns to be tracked in the final output in addition to _filt_cols and _phenotype_cols.

_control_cutoff: Integer
(Optional default=50) The minimum number of cells required for a negative control well to be included in the comparison.

Expand All @@ -140,9 +143,14 @@ def get_treatment_comparison(_comp_functions, _treatdf, _negcondf, _phenotype_co
# The columns for keeping track of metadata and filtering the negative control cells
ref_cols = dict(zip(_filt_cols, filt_col_vals))

# Include other columns of interest not directly used for comparing probabilities if specified
if _tracked_cols:
ref_cols = {**ref_cols, **dict(zip(_tracked_cols, group_treatdf[_tracked_cols].iloc[0]))}

# The negative control cells
group_negdf = _negcondf.loc[(_negcondf["Metadata_Plate"] == ref_cols["Metadata_Plate"]) &
(_negcondf["Metadata_model_type"] == ref_cols["Metadata_model_type"])
(_negcondf["Metadata_model_type"] == ref_cols["Metadata_model_type"]) &
(_negcondf["Cell_type"] == ref_cols["Cell_type"])
]

# Remove wells if the cell count is below the corresponding threshold
Expand Down
Loading