Fix output paths in map_snps_to_genes for Windows

neurogenomics · Mar 1, 2023 · 8ad8fd3 · 8ad8fd3
1 parent d4c476a
commit 8ad8fd3
Show file tree

Hide file tree

Showing 27 changed files with 268 additions and 150 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -50,7 +50,6 @@ importFrom(data.table,rbindlist)
 importFrom(data.table,setcolorder)
 importFrom(data.table,setkey)
 importFrom(data.table,setnames)
-importFrom(dplyr,"%>%")
 importFrom(dplyr,arrange)
 importFrom(dplyr,filter)
 importFrom(dplyr,mutate)

diff --git a/NEWS.md b/NEWS.md
@@ -18,6 +18,14 @@
     - Update example enrichment results to new format with nested `levels`.
 * Add *CITATION* file.
 * Add citation info and volcano/cell icons to startup message.
+* Replace `%>%` with `|>` syntax.
+* `celltype_associations_pipeline`:
+    - Print error messages.
+* `get_ctd`
+    - Add new *ctd_allAIBS*: https://github.com/neurogenomics/MAGMA_Celltyping/issues
+* `map_snps_to_genes`
+    - Fix `genesOut` path for Windows.
+    - Subfunctionalize :`check_n`, `check_genomeLocFile`
 
 ## Bug fixes 
 
@@ -26,6 +34,8 @@
     - Fix NAs issue by ensuring celltype names in `ctAssocs` and `ctdDendro`
         are both run through `EWCE::fix_celltype_names()` 
         when creating an ordered factor.
+* `merge_magma_results`
+    - Add names to levels 
 
 
 # MAGMA.Celltyping 2.0.7

diff --git a/R/calculate_celltype_associations.r b/R/calculate_celltype_associations.r
@@ -233,7 +233,7 @@ calculate_celltype_associations <- function(ctd,
             ControlForCT = "BASELINE"
         ) 
         return(tmp)
-    }) %>% `names<-`(paste0("level",ctd_levels)) # //End lapply loop
+    }) |> `names<-`(paste0("level",ctd_levels)) # //End lapply loop
 
     #### Calculate total number of tests performed ####
     totalTests <- 0

diff --git a/R/celltype_associations_pipeline.r b/R/celltype_associations_pipeline.r
@@ -84,6 +84,10 @@ celltype_associations_pipeline <- function(ctd,
     ctAssocsTop <- NULL
     ctCondAssocs <- NULL
     ctAssocMerged <- NULL
+    #### Check required inputs ####
+    force(ctd)
+    force(ctd_name)
+    force(magma_dirs) 
     #### prepare quantile groups ####
     # MAGMA.Celltyping can only use human GWAS
     {
@@ -108,7 +112,7 @@ celltype_associations_pipeline <- function(ctd,
                                               upstream_kb = upstream_kb,
                                               downstream_kb = downstream_kb)
         #### Linear mode ####
-        if (run_linear) {
+        if (isTRUE(run_linear)) {
             messager("=======",
                      "Calculating celltype associations: linear mode",
                      "=======",
@@ -129,12 +133,11 @@ celltype_associations_pipeline <- function(ctd,
                     version = version,
                     verbose = verbose
                 )
-            }, error = function(e) {NULL}
+            }, error = function(e) {messager(e,v=verbose);NULL}
             )
-        }
-
+        } 
         #### Top 10% mode ####
-        if (run_top10) {
+        if (isTRUE(run_top10)) {
             messager("=======",
                      "Calculating celltype associations: top10% mode",
                      "=======",
@@ -155,11 +158,9 @@ celltype_associations_pipeline <- function(ctd,
                     version = version,
                     verbose = verbose
                 )
-            }, error = function(e) {NULL}
+            }, error = function(e) {messager(e,v=verbose);NULL}
             )
-        }
-
-
+        } 
         #### Merge results ####
         if (all(!is.null(ctAssocsLinear), !is.null(ctAssocsTop))) {
             messager("Merging linear and top10% results",
@@ -169,10 +170,9 @@ celltype_associations_pipeline <- function(ctd,
                 ctAssoc1 = ctAssocsLinear,
                 ctAssoc2 = ctAssocsTop
             )
-        }
-
+        } 
         #### Conditional mode ####
-        if (run_conditional) {
+        if (isTRUE(run_conditional)) {
             messager("=======",
                      "Calculating celltype associations: conditional mode",
                      "=======",
@@ -197,7 +197,7 @@ celltype_associations_pipeline <- function(ctd,
                         version = version,
                         verbose = verbose
                     )
-                }, error = function(e) {NULL}
+                }, error = function(e) {messager(e,v=verbose);NULL}
             )
         }
 
@@ -208,9 +208,8 @@ celltype_associations_pipeline <- function(ctd,
             ctAssocMerged = ctAssocMerged,
             ctCondAssocs = ctCondAssocs
         ))
-    }, mc.cores = nThread) %>% `names<-`(basename(magma_dirs))
-
-
+    }, mc.cores = nThread) |> `names<-`(basename(magma_dirs))
+    #### Save results ####
     if (!is.null(save_dir)) {
         save_path <- file.path(
             save_dir, ctd_name,

diff --git a/R/check_genomeLocFile.R b/R/check_genomeLocFile.R
@@ -0,0 +1,13 @@
+check_genomeLocFile <- function(genome_build,
+                                path_formatted){ 
+    if (toupper(genome_build) %in% c("GRCH36")) {
+        genomeLocFile <- get_genomeLocFile(build = "GRCH36")
+    } else if (toupper(genome_build) %in% c("GRCH37","HG37","HG19")) { 
+        genomeLocFile <- get_genomeLocFile(build = "GRCH37")
+    } else if (toupper(genome_build) %in% c("GRCH38","HG38")) { 
+        genomeLocFile <- get_genomeLocFile(build = "GRCH38")
+    } else {
+        stop("Genome build must be: 'GRCH36', `GRCH37', or 'GRCH38'")
+    }
+    return(genomeLocFile)
+}
diff --git a/R/check_n.R b/R/check_n.R
@@ -0,0 +1,41 @@
+check_n <- function(path_formatted,
+                    N){
+
+    if (is.null(N) | is.na(N)) {
+        first_line <- readLines(path_formatted, n = 1)
+        column_headers <- strsplit(first_line, "\t")[[1]]
+        if ("N" %in% column_headers) {
+            n_arg <- "ncol=N"
+        } else {
+            nval <- as.numeric(
+                readline(paste(
+                    "There is no N column within the sumstats file.",
+                    "What is the N value for this GWAS?"
+                ))
+            )
+
+            if (is.na(nval)) {
+                stop(paste(
+                    nval, "provided but value of N for",
+                    "the GWAS must be numeric"
+                ))
+            }
+            if (nval < 1000) {
+                stop(paste(
+                    "Value of N provided is less than 1,000.",
+                    "This seems unlikely."
+                ))
+            }
+            if (nval > 100000000) {
+                stop(paste(
+                    "Value of N provided is over than 100,000,000.",
+                    "This seems unlikely."
+                ))
+            }
+            n_arg <- sprintf("N=%s", nval)
+        }
+    } else {
+        n_arg <- sprintf("N=%s", N)
+    }
+    return(n_arg)
+}
diff --git a/R/data.r b/R/data.r
@@ -26,16 +26,16 @@
 #'                                  method = "gprofiler",
 #'                                  target = "ENTREZGENE_ACC",
 #'                                  ensure_filter_nas = FALSE)
-#' hgnc2entrez_orthogene <- gene_map %>%
+#' hgnc2entrez_orthogene <- gene_map |>
 #'     dplyr::select(hgnc_symbol = Gene.Symbol,
-#'                   entrez = target) %>%
+#'                   entrez = target) |>
 #'     unique()
 #' #### Compare to other dataset ####
-#' dt1 <- hgnc2entrez %>% dplyr::filter(!hgnc_symbol %in% c(NA,""),
-#'                                      !entrez %in% c(NA,"")) %>%
+#' dt1 <- hgnc2entrez |> dplyr::filter(!hgnc_symbol %in% c(NA,""),
+#'                                      !entrez %in% c(NA,"")) |>
 #'     unique()
-#' dt2 <- hgnc2entrez_orthogene %>% dplyr::filter(!hgnc_symbol %in% c(NA,""),
-#'                                                !entrez %in% c(NA,"")) %>%
+#' dt2 <- hgnc2entrez_orthogene |> dplyr::filter(!hgnc_symbol %in% c(NA,""),
+#'                                                !entrez %in% c(NA,"")) |>
 #'     unique()
 #' message("hgnc2entrez_orthogene has ",
 #'         formatC(nrow(dt2) - nrow(dt1), big.mark = ","),

diff --git a/R/fix_path.R b/R/fix_path.R
@@ -10,6 +10,8 @@
 fix_path <- function(x){
   x <- path.expand(x)
   if(get_os()=='Windows'){
+    # Remove a trailing slash to avoid errors on windows
+    x <- gsub("\\/$", "", x)
     x <- normalizePath(x, mustWork = FALSE)
   }
   return(x)

diff --git a/R/get_ctd.R b/R/get_ctd.R
@@ -12,6 +12,15 @@
 #' Allen Brain Institute website}\cr 
 #' \href{http://www.hjerling-leffler-lab.org/data/scz_singlecell/}{Source}
 #' }
+#' \item{"ctd_allAIBS"\cr}{CTD file derived from adult human cortex 
+#' scRNA-seq data collected by the Allen Institute for Brain Science (AIBS) 
+#' Note that this CTD used an later release of the AIBS data that included
+#' samples from multiple human brain regions.
+#' \cr Reference: \doi{10.1038/s41586-019-1506-7}\cr
+#' \href{https://portal.brain-map.org/atlases-and-data/rnaseq/data-files-2018}{
+#' Allen Brain Institute website}\cr 
+#' \href{http://www.hjerling-leffler-lab.org/data/scz_singlecell/}{Source}
+#' }
 #' \item{"ctd_allKI"\cr}{CTD file with cortex, hippocampus,
 #' hypothalamus and midbrain.
 #' \cr Reference: \doi{10.1038/s41588-018-0129-5}\cr

diff --git a/R/get_gene_info.R b/R/get_gene_info.R
@@ -31,9 +31,9 @@ get_gene_info <- function(genelist,
     )
     gene_info$size <- gene_info$end_position - gene_info$start_position
     if (one_row_per_gene) {
-        gene_info <- gene_info %>%
-            dplyr::group_by(hgnc_symbol) %>%
-            dplyr::slice_head(n = 1) %>%
+        gene_info <- gene_info |>
+            dplyr::group_by(hgnc_symbol) |>
+            dplyr::slice_head(n = 1) |>
             data.frame()
         rownames(gene_info) <- gene_info$hgnc_symbol
     }

diff --git a/R/get_genomeLocFile.R b/R/get_genomeLocFile.R
@@ -28,7 +28,7 @@
 #'     local <- file.path(tempdir(),basename(files[x]))
 #'     utils::download.file(files[x],local)
 #'     return(local)
-#' }) %>% `names<-`(names(files))
+#' }) |> `names<-`(names(files))
 #' 
 #' for(x in names(local_files)){
 #'     piggyback::pb_upload(file = local_files[[x]],

diff --git a/R/infer_ctd_species.R b/R/infer_ctd_species.R
@@ -1,10 +1,12 @@
 #' Infer CellTypeDataset species
 #' 
-#' Infers species from from level 1 of a CellTypeDataset
-#' using \link[orthogene]{infer_species}. 
+#' Infers species from from level 1 of a CellTypeDataset (CTD)
+#' using either the metadata stored in the CTD
+#'  (if the object has previously been standardised with 
+#'  \link[EWCE]{standardise_ctd}) or using the gene names
+#'   (via \link[orthogene]{infer_species}). 
 #' If \code{ctd_species} is not \code{NULL}, 
 #' this will be returned instead of inferring the species. 
-#' 
 #' @param verbose Message verbosity. 
 #' \itemize{
 #' \item{\code{0} or \code{FALSE} : }{
@@ -27,20 +29,28 @@
 #' ctd_species <- infer_ctd_species(ctd = ewceData::ctd())
 infer_ctd_species <- function(ctd, 
                               ctd_species = NULL,
-                              verbose = 1,
+                              verbose = 2,
                               ...){
     #### Mainly for adjust_zstat_in_genesOut where ctd is optional ####
     if(is.null(ctd)) return(NULL)
-    #### Infer ####
     if(is.null(ctd_species)) {
-        messager("ctd_species=NULL: Inferring species from gene names.",
-                 v=sum(verbose)>0)
-        ctd_species <- orthogene::infer_species(
-            gene_df = ctd[[1]]$mean_exp,
-            verbose = sum(verbose)>1,
-            make_plot = FALSE,
-            ...
-        )$top_match
+        #### Infer from metadata ####
+        if(!is.null(ctd[[1]]$species$output_species)){
+            messager("ctd_species=NULL: Inferring species from ctd metadata.",
+                     v=sum(verbose)>0)
+            ctd_species <- ctd[[1]]$species$output_species
+        #### Infer from genes #### 
+        } else { 
+            messager("ctd_species=NULL: Inferring species from gene names.",
+                     v=sum(verbose)>0)
+            ctd_species <- orthogene::infer_species(
+                gene_df = ctd[[1]]$mean_exp,
+                verbose = sum(verbose)>1,
+                make_plot = FALSE,
+                ...
+            )$top_match
+        } 
+        #### Report ####
         messager("Inferred ctd species:",ctd_species,
                  v=sum(verbose)>0)
     }

diff --git a/R/load_magma_results_file.r b/R/load_magma_results_file.r
@@ -135,9 +135,9 @@ load_magma_results_file <- function(path,
     res$log10p <- log(res$P, 10)
     res$genesOutCOND <- paste(genesOutCOND, collapse = " | ")
     res$EnrichmentMode <- EnrichmentMode 
-    res <- res %>%
+    res <- res |>
         dplyr::rename(Celltype = VARIABLE,
-                      OBS_GENES = NGENES) #%>%
+                      OBS_GENES = NGENES) #|>
         # purrr::modify_at(c("SET"), ~NULL)
     res$SET <- NULL
     res <- res[, c(

diff --git a/R/magma_links_gather.R b/R/magma_links_gather.R
@@ -14,7 +14,7 @@ magma_links_gather <- function(){
                                      unique_only = FALSE,
                                      filter_v = FALSE)
     meta <- data.table::data.table(link=links,
-                                   version=versions) %>%
+                                   version=versions) |>
         dplyr::arrange(dplyr::desc(version))
     #### Add column indicating which rows are the latest version #### 
     meta$latest <- meta$version==rev(versions)[1]
@@ -26,7 +26,7 @@ magma_links_gather <- function(){
         ),
         c("Mac","Windows","Linux")
     )  
-    meta <- meta %>% 
+    meta <- meta |> 
         dplyr::mutate(os=
             ifelse(
                 grepl("_source|static|icc|gpp", link), "source",