diff --git a/.DS_Store b/.DS_Store index 876889c..e34fb84 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/sceptre_package/sceptre/data/covariate_matrix.rda b/sceptre_package/sceptre/data/covariate_matrix.rda deleted file mode 100644 index 2e40079..0000000 Binary files a/sceptre_package/sceptre/data/covariate_matrix.rda and /dev/null differ diff --git a/sceptre_package/sceptre/data/expressions.rda b/sceptre_package/sceptre/data/expressions.rda deleted file mode 100644 index 70b38dd..0000000 Binary files a/sceptre_package/sceptre/data/expressions.rda and /dev/null differ diff --git a/sceptre_package/sceptre/data/gRNA_indicators.rda b/sceptre_package/sceptre/data/gRNA_indicators.rda deleted file mode 100644 index da8c2c5..0000000 Binary files a/sceptre_package/sceptre/data/gRNA_indicators.rda and /dev/null differ diff --git a/sceptre_paper/.DS_Store b/sceptre_paper/.DS_Store index b5c61d6..fdaaac1 100644 Binary files a/sceptre_paper/.DS_Store and b/sceptre_paper/.DS_Store differ diff --git a/sceptre_paper/analysis_drivers/.DS_Store b/sceptre_paper/analysis_drivers/.DS_Store index 7572646..90ead10 100644 Binary files a/sceptre_paper/analysis_drivers/.DS_Store and b/sceptre_paper/analysis_drivers/.DS_Store differ diff --git a/sceptre_paper/analysis_drivers/analysis_drivers_xie/create_monocle_object_4.1.R b/sceptre_paper/analysis_drivers/analysis_drivers_xie/create_monocle_object_5.1.R similarity index 67% rename from sceptre_paper/analysis_drivers/analysis_drivers_xie/create_monocle_object_4.1.R rename to sceptre_paper/analysis_drivers/analysis_drivers_xie/create_monocle_object_5.1.R index 16f7ebd..f9e0b3c 100644 --- a/sceptre_paper/analysis_drivers/analysis_drivers_xie/create_monocle_object_4.1.R +++ b/sceptre_paper/analysis_drivers/analysis_drivers_xie/create_monocle_object_5.1.R @@ -1,22 +1,26 @@ code_dir <- paste0(.get_config_path("LOCAL_CODE_DIR"), "sceptre-manuscript") offsite_dir <- .get_config_path("LOCAL_SCEPTRE_DATA_DIR") source(paste0(code_dir, "/sceptre_paper/analysis_drivers/analysis_drivers_xie/paths_to_dirs.R")) +analysis_ready_dir <- paste0(offsite_dir, "data/xie/analysis_ready") # Load the necessary data -highly_expressed_genes <- readRDS(paste0(processed_dir, "/highly_expressed_genes.rds")) -gene_ids <- as.character(readRDS(paste0(processed_dir, "/ordered_gene_ids.RDS"))) -cell_gene_expression_matrix_info <- readRDS(paste0(processed_dir, "/exp_mat_metadata.rds")) -cell_gene_expression_matrix_info$backingfile <- paste0(processed_dir, "/expression_matrix") +cell_gene_expression_matrix_info <- readRDS(paste0(analysis_ready_dir, "/exp_mat_sub_metadata.rds")) +cell_gene_expression_matrix_info$backingfile <- paste0(analysis_ready_dir, "/expression_matrix_sub") cell_gene_expression_matrix <- cell_gene_expression_matrix_info %>% load_fbm -gRNA_mat <- fst::read_fst(path = paste0(processed_dir, "/gRNA_indicator_matrix.fst")) -cell_subset <- readRDS(paste0(processed_dir, "/cell_subset.rds")) -covariate_matrix <- read.fst(paste0(processed_dir, "/covariate_model_matrix.fst")) +gene_ids <- as.character(readRDS(paste0(processed_dir, "/ordered_gene_ids.RDS"))) +gRNA_mat <- fst::read_fst(path = paste0(analysis_ready_dir, "/gRNA_indicator_matrix.fst")) +gene_gRNA_pairs <- fst::read_fst(paste0(processed_dir, "/gRNA_gene_pairs.fst")) +gene_ids_used <- unique(gene_gRNA_pairs$gene_id) %>% as.character() +covariate_matrix <- fst::read_fst(paste0(analysis_ready_dir, "/covariate_model_matrix.fst")) # Obtain the expression matrix -gene_idxs <- match(x = highly_expressed_genes, table = gene_ids) +gene_idxs <- match(x = gene_ids_used, table = gene_ids) exp_mat <- Matrix::Matrix(data = cell_gene_expression_matrix[,gene_idxs], sparse = TRUE) exp_mat <- Matrix::t(exp_mat) +# ensure the dimensions of expression matrix, covariate matrix, and gRNA matrix match +dim(exp_mat); dim(covariate_matrix); dim(gRNA_mat) + # obtain the global covariate matrix global_covariate_matrix <- cbind(covariate_matrix, gRNA_mat) @@ -24,12 +28,9 @@ global_covariate_matrix <- cbind(covariate_matrix, gRNA_mat) feature_df <- data.frame(id = gene_ids[gene_idxs], gene_short_name = "NA") -# subset exp_mat and global_covariate_matrix according to cell_subset -exp_mat <- exp_mat[,cell_subset] - # assign column and row names to the data frames and matrices # cell names first -cell_names <- paste0("cell", seq(1, length(cell_subset))) +cell_names <- paste0("cell", seq(1, (ncol(exp_mat)))) colnames(exp_mat) <- cell_names row.names(global_covariate_matrix) <- cell_names # feature names second @@ -56,4 +57,4 @@ cds <- estimateSizeFactors(cds) cds <- estimateDispersions(cds) # save the monocle object -saveRDS(object = cds, file = paste0(processed_dir, "/monocole_obj.rds")) +saveRDS(object = cds, file = paste0(analysis_ready_dir, "/monocole_obj.rds")) diff --git a/sceptre_paper/analysis_drivers/analysis_drivers_xie/quality_control_4.R b/sceptre_paper/analysis_drivers/analysis_drivers_xie/quality_control_4.R index dc7d6d1..cea6afa 100644 --- a/sceptre_paper/analysis_drivers/analysis_drivers_xie/quality_control_4.R +++ b/sceptre_paper/analysis_drivers/analysis_drivers_xie/quality_control_4.R @@ -44,15 +44,20 @@ global_covariate_matrix <- data.frame(log_n_umis = log(n_umis_per_cell[cell_subs batch = gRNA_covariate_matrix_sub$batch) # save -saveRDS(object = cell_subset, file = paste0(processed_dir, "/cell_subset.rds")) +# create a new directory, "analysis_ready" to store all data in analysis-ready format. +analysis_ready_dir <- paste0(offsite_dir, "data/xie/analysis_ready") + fst::write_fst(x = global_covariate_matrix, - path = paste0(processed_dir, "/covariate_model_matrix.fst")) -saveRDS(object = highly_expressed_genes, file = paste0(processed_dir, "/highly_expressed_genes.rds")) -gRNA_indic_mat <- fst::write_fst(gRNA_indic_matrix_sub, paste0(processed_dir, "/gRNA_indicator_matrix.fst")) + path = paste0(analysis_ready_dir, "/covariate_model_matrix.fst")) +saveRDS(object = highly_expressed_genes, file = paste0(analysis_ready_dir, "/highly_expressed_genes.rds")) +gRNA_indic_mat <- fst::write_fst(gRNA_indic_matrix_sub, paste0(analysis_ready_dir, "/gRNA_indicator_matrix.fst")) ############################################### # save subsetted cell-by-gene expression matrix ############################################### + +if (!dir.exists(analysis_ready_dir)) dir.create(path = analysis_ready_dir, recursive = TRUE) + gene_ids <- readRDS(paste0(processed_dir, "/ordered_gene_ids.RDS")) exp_mat <- readRDS(paste0(processed_dir, "/exp_mat_metadata.rds")) %>% load_fbm() exp_mat_mem <- exp_mat[,seq(1, ncol(exp_mat))] @@ -61,11 +66,12 @@ exp_mat_sub_disk <- FBM(nrow = nrow(exp_mat_sub), ncol = ncol(exp_mat_sub), type = "unsigned short", init = 0, - backingfile = paste0(processed_dir, "/expression_matrix_sub"), + backingfile = paste0(analysis_ready_dir, "/expression_matrix_sub"), create_bk = TRUE) exp_mat_sub_disk[1:nrow(exp_mat_sub), 1:ncol(exp_mat_sub)] <- exp_mat_sub exp_mat_sub_metadata <- list(nrow = nrow(exp_mat_sub), ncol = ncol(exp_mat_sub), type = "unsigned short", - backingfile = paste0(processed_dir, "/expression_matrix_sub")) -saveRDS(object = exp_mat_sub_metadata, paste0(processed_dir, "/exp_mat_sub_metadata.rds")) + backingfile = paste0(analysis_ready_dir, "/expression_matrix_sub")) +saveRDS(object = exp_mat_sub_metadata, paste0(analysis_ready_dir, "/exp_mat_sub_metadata.rds")) + diff --git a/sceptre_paper/utilities/run_xie_analysis.bash b/sceptre_paper/utilities/run_xie_analysis.bash index c716f28..9743b3a 100644 --- a/sceptre_paper/utilities/run_xie_analysis.bash +++ b/sceptre_paper/utilities/run_xie_analysis.bash @@ -33,12 +33,13 @@ Rscript $code_dir"/sceptre_paper/analysis_drivers/analysis_drivers_xie/"pre_proc echo Construct model covariate matrix and perform quality control. Rscript $code_dir"/sceptre_paper/analysis_drivers/analysis_drivers_xie/"quality_control_4.R -echo Create monocole object for monocole NB analysis. -Rscript $code_dir"/sceptre_paper/analysis_drivers/analysis_drivers_xie/"create_monocle_object_4.1.R - echo Determine the gene-gRNA pairs to analyze. Rscript $code_dir"/sceptre_paper/analysis_drivers/analysis_drivers_xie/"select_gRNA_gene_pair_5.R +echo Create monocole object for monocole NB analysis. +Rscript $code_dir"/sceptre_paper/analysis_drivers/analysis_drivers_xie/"create_monocle_object_5.1.R + + # Locate the parameter file parameter_file=$code_dir"/sceptre_paper/analysis_drivers/analysis_drivers_xie/"sceptre_function_args.R diff --git a/sceptre_paper/utilities/run_xie_analysis_fix.bash b/sceptre_paper/utilities/run_xie_analysis_fix.bash deleted file mode 100644 index 8ee96e3..0000000 --- a/sceptre_paper/utilities/run_xie_analysis_fix.bash +++ /dev/null @@ -1,3 +0,0 @@ -# Compute covariate matrix - -# get pairs to analyze (highly expressed, protein-coding genes only)