Merge pull request #28 from PNNL-Comp-Mass-Spec/update/run_plexedpipe…

…r_redox Add redox processing capability to run_plexedpiper
PNNL-Comp-Mass-Spec · Jul 13, 2023 · c6ab690 · c6ab690
2 parents c5865fe + 38c98f4
commit c6ab690
Show file tree

Hide file tree

Showing 17 changed files with 419 additions and 336 deletions.
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
@@ -1,12 +1,12 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
-
 on:
   push:
-    branches:
-      master
+    branches: [main, master]
   pull_request:
-    branches: master
+    branches: [main, master]
   workflow_dispatch:
+    branches: [main, master]
 
 name: R-CMD-check
 
@@ -20,56 +20,30 @@ jobs:
       fail-fast: false
       matrix:
         config:
+          - {os: macos-latest,   r: 'release'}
           - {os: windows-latest, r: 'release'}
-          - {os: macOS-latest, r: 'release'}
+          - {os: ubuntu-latest,   r: 'release'}
 
     env:
-      R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
-      RSPM: ${{ matrix.config.rspm }}
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+      R_KEEP_PKG_SOURCE: yes
 
     steps:
-      - name: Set up Git Repository
-        uses: actions/checkout@v2
+      - uses: actions/checkout@v3
+
+      - uses: r-lib/actions/setup-pandoc@v2
 
-      - name: Set up R
-        uses: r-lib/actions/setup-r@v1
+      - uses: r-lib/actions/setup-r@v2
         with:
           r-version: ${{ matrix.config.r }}
+          http-user-agent: ${{ matrix.config.http-user-agent }}
+          use-public-rspm: true
 
-      - name: Set up Pandoc
-        uses: r-lib/actions/setup-pandoc@v1
-
-      - name: Query dependencies
-        run: |
-          install.packages("devtools")
-          saveRDS(devtools::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
-          writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
-        shell: Rscript {0}
-
-      - name: Restore R package cache
-        uses: actions/cache@v2
+      - uses: r-lib/actions/setup-r-dependencies@v2
         with:
-          path: ${{ env.R_LIBS_USER }}
-          key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
-          restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-
-
-      - name: Install dependencies
-        run: |
-          devtools::install_deps(dependencies = TRUE)
-        shell: Rscript {0}
-
-      - name: Check
-        env:
-          _R_CHECK_CRAN_INCOMING_REMOTE_: false
-        run: |
-          options(crayon.enabled = TRUE)
-          devtools::check(error_on = "error", vignettes = FALSE)
-        shell: Rscript {0}
+          extra-packages: any::rcmdcheck
+          needs: check
 
-      - name: Upload check results
-        if: failure()
-        uses: actions/upload-artifact@main
+      - uses: r-lib/actions/check-r-package@v2
         with:
-          name: ${{ runner.os }}-r${{ matrix.config.r }}-results
-          path: check
+          upload-snapshots: true
diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml
@@ -1,10 +1,6 @@
 # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 on:
-  push:
-    branches: [main, master]
-  pull_request:
-    branches: [main, master]
   release:
     types: [published]
   workflow_dispatch:

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: PlexedPiper
 Type: Package
 Title: Pipeline for isobaric quantification
-Version: 0.4.1
-Date: 2023-02-07
+Version: 0.4.2
+Date: 2023-07-06
 Author: Vladislav Petyuk vladislav.petyuk@pnnl.gov
 Maintainer: Vladislav Petyuk <vladislav.petyuk@pnnl.gov>
 Description: Pipeline for isobaric quantification.
@@ -11,7 +11,7 @@ Encoding: UTF-8
 LazyData: true
 RoxygenNote: 7.2.3
 Depends:
-    MSnID (>= 1.18.1)
+    MSnID (>= 1.25.2)
 Imports:
     Biostrings,
     data.table,
@@ -21,7 +21,6 @@ Imports:
     purrr,
     tibble,
     tidyr,
-    tidyselect,
     readr,
     utils
 Suggests:

diff --git a/NAMESPACE b/NAMESPACE
@@ -61,6 +61,7 @@ importFrom(data.table,setnames)
 importFrom(dplyr,"%>%")
 importFrom(dplyr,across)
 importFrom(dplyr,all_of)
+importFrom(dplyr,any_of)
 importFrom(dplyr,arrange)
 importFrom(dplyr,bind_cols)
 importFrom(dplyr,bind_rows)
@@ -76,20 +77,20 @@ importFrom(dplyr,mutate)
 importFrom(dplyr,n)
 importFrom(dplyr,pull)
 importFrom(dplyr,rename)
+importFrom(dplyr,rowwise)
 importFrom(dplyr,select)
 importFrom(dplyr,starts_with)
 importFrom(dplyr,summarise)
 importFrom(dplyr,summarize)
 importFrom(dplyr,ungroup)
+importFrom(dplyr,where)
 importFrom(plyr,llply)
 importFrom(purrr,map)
 importFrom(purrr,reduce)
 importFrom(readr,read_tsv)
 importFrom(tibble,rownames_to_column)
 importFrom(tidyr,pivot_longer)
 importFrom(tidyr,pivot_wider)
-importFrom(tidyr,separate)
-importFrom(tidyselect,where)
 importFrom(utils,read.delim)
 importFrom(utils,read.table)
 importFrom(utils,write.table)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,9 @@
+# PlexedPiper 0.4.2 (2023-07-06)
+
+-   Update `run_plexedpiper` to process redox proteomics data.
+-   Add more robust checks for `read_study_design` output.
+-   Set minimum MSnID version to 1.25.2, though this does not prevent installation of an incorrect version of MSnID from Bioconductor, since the version number on Bioconductor is higher.
+
 # PlexedPiper 0.4.1 (2023-02-07)
 
 -   Removed duplicate GENCODE protein IDs from `run_plexedpiper` output. GENCODE IDs are currently only unique when combining the protein (ENSP) and transcript (ENST) IDs. Since there are so few duplicates, we will remove them rather than concatenating these IDs in the "protein_id" column of the output of `make_results_ratio_*` and `make_rii_peptide_*` functions.

diff --git a/R/filter_msfragger_data.R b/R/filter_msfragger_data.R
@@ -1,46 +1,44 @@
-#' Filtering msfragger Data
+#' Filtering MSFragger Data
+#'
+#' Filtering MSFragger data. In this implementation, the peptide-level filter
+#' optimizes both ppm and one of Expectation or PeptideProphet Probability
+#' thresholds to achieve maximum number of peptide identifications within a
+#' given FDR constraint.
 #'
-#' Filtering msfragger data. In this implementation, the peptide-level filter
-#' optimizes both ppm and one of Expectation or PeptideProphet Probability thresholds
-#' to achieve maximum number of peptide identifications within a given FDR constraint.
 #' The accession-level filter optimizes based on `peptides_per_1000aa`, so
 #' \code{\link{compute_num_peptides_per_1000aa}} must be used first.
 #'
 #' @md
 #'
-#' @param msnid (MSnID object) collated msfragger output
+#' @param msnid (MSnID object) collated MSFragger output
 #' @param fdr.max (numeric) Maximum acceptable FDR. Default is 0.01 (1%).
 #' @param level (character) Level at which to perform FDR filter. The name of a
 #'   column in `psms(msnid)`. Currently, only `"peptide"` or `"accession"` are
-#'   supported. The added level `SiteID` makes sense only for PTM data and 
+#'   supported. The added level `"SiteID"` makes sense only for PTM data and
 #'   first requires mapping of the modification site using
-#'    `MSnID::map_mod_sites`.
-#' @param filtering_criterion (character) One of "evalue" which is 
-#'    expectation value or "pp_prob" - peptide prophet probability. Default is
-#'    "pp_prob".
+#'   `MSnID::map_mod_sites`.
+#' @param filtering_criterion (character) One of `"evalue"` which is expectation
+#'   value or `"pp_prob"` - peptide prophet probability. Default is "pp_prob".
 #' @param n.iter.grid (numeric) number of grid-distributed evaluation points.
 #' @param n.iter.nm (numeric) number of iterations for Nelder-Mead optimization
 #'   algorithm.
-#' @param ... arguments passed to `filter_msfragger_data`.
 #'
-#' @return (MSnID object) filtered msfragger output
+#' @return (MSnID object) filtered MSFragger output
 #'
-#' @seealso
-#' \code{\link[MSnID]{MSnIDFilter}}
-#' \code{\link[MSnID]{optimize_filter}}
-#' \code{\link[MSnID]{apply_filter}}
+#' @seealso \code{\link[MSnID]{MSnIDFilter}}
+#'   \code{\link[MSnID]{optimize_filter}} \code{\link[MSnID]{apply_filter}}
 #'
-#' @importFrom MSnID MSnIDFilter optimize_filter
-#'   mass_measurement_error apply_filter
+#' @importFrom MSnID MSnIDFilter optimize_filter mass_measurement_error
+#'   apply_filter
 
 
 #' @export
 filter_msfragger_data <- function(msnid,
-                             level,
-                             filtering_criterion = c("pp_prob","evalue"),
-                             fdr.max=0.01,
-                             n.iter.grid=500,
-                             n.iter.nm=100){
+                                  level,
+                                  filtering_criterion = c("pp_prob", "evalue"),
+                                  fdr.max=0.01,
+                                  n.iter.grid=500,
+                                  n.iter.nm=100){
 
   # Clean up on exit
   on.exit(rm(list = ls()))
@@ -49,18 +47,18 @@ filter_msfragger_data <- function(msnid,
   # Check input
   level <- match.arg(level, choices = c("peptide", "accession", "SiteID"))
   filtering_criterion <- match.arg(filtering_criterion)
-
-  if(level == "SiteID" & !("SiteID" %in% names(msnid)))
-     stop("Column 'SiteID' is not in the MSnID object. Please map the PTMs first.")
+
+  if (level == "SiteID" & !("SiteID" %in% names(msnid))) {
+    stop("Column 'SiteID' is not in the MSnID object. Please map the PTMs first.")
+  }
 
   keep_cols <- c(level, "isDecoy") # columns to calculate FDR
 
   # Create MSnID of minimum size
   suppressMessages(msnid_small <- MSnID())
-  
+
   # Setup
   if (level == "accession") {
-
     # Add filter criteria column
     keep_cols <- c(keep_cols, "peptides_per_1000aa")
     msnid_small@psms <- unique(msnid@psms[, keep_cols, with = FALSE])
@@ -69,36 +67,41 @@ filter_msfragger_data <- function(msnid,
     filtObj <- MSnIDFilter(msnid_small)
     filtObj$peptides_per_1000aa <- list(comparison = ">", threshold = 1)
     method <- "SANN"
+
   } else {
-     #Choose filter object probability value
-     if (filtering_criterion == "evalue") {
-        msnid$msmsScore <- -log10(msnid$Expectation)
-     }
-     if (filtering_criterion == "pp_prob") {
-        msnid$msmsScore <- msnid$`PeptideProphet Probability`
-     }
-     # Create columns for peptide filtering
-     # Can not use data.table syntax if the msnid has been modified at all,
-     # as it results in the "Invalid .internal.selfref" warning and
-     # columns not being created.
-     msnid$absParentMassErrorPPM <- abs(mass_measurement_error(msnid))
-
-     # Add filter criteria columns
-     keep_cols <- c(keep_cols, "msmsScore", "absParentMassErrorPPM")
-     msnid_small@psms <- unique(msnid@psms[, keep_cols, with = FALSE])
-
-     # Create filter object
-     filtObj <- MSnIDFilter(msnid_small)
-     filtObj$absParentMassErrorPPM <- list(comparison = "<", threshold = 10)
-     if (filtering_criterion == "evalue") {
-        filtObj$msmsScore <- list(comparison = ">", threshold = 2)
-     }
-     if (filtering_criterion == "pp_prob") {
-        filtObj$msmsScore <- list(comparison = ">", threshold = 0.99)
-     }
-     method <- "Nelder-Mead"
+    #Choose filter object probability value
+    if (filtering_criterion == "evalue") {
+      msnid$msmsScore <- -log10(msnid$Expectation)
+    }
+
+    if (filtering_criterion == "pp_prob") {
+      msnid$msmsScore <- msnid$`PeptideProphet Probability`
+    }
+
+    # Create columns for peptide filtering
+    # Can not use data.table syntax if the msnid has been modified at all,
+    # as it results in the "Invalid .internal.selfref" warning and
+    # columns not being created.
+    msnid$absParentMassErrorPPM <- abs(mass_measurement_error(msnid))
+
+    # Add filter criteria columns
+    keep_cols <- c(keep_cols, "msmsScore", "absParentMassErrorPPM")
+    msnid_small@psms <- unique(msnid@psms[, keep_cols, with = FALSE])
+
+    # Create filter object
+    filtObj <- MSnIDFilter(msnid_small)
+    filtObj$absParentMassErrorPPM <- list(comparison = "<", threshold = 10)
+
+    if (filtering_criterion == "evalue") {
+      filtObj$msmsScore <- list(comparison = ">", threshold = 2)
+    }
+
+    if (filtering_criterion == "pp_prob") {
+      filtObj$msmsScore <- list(comparison = ">", threshold = 0.99)
+    }
+
+    method <- "Nelder-Mead"
   }
-
 
   # step 1
   filtObj.grid <- optimize_filter(filtObj,
@@ -107,12 +110,17 @@ filter_msfragger_data <- function(msnid,
                                   method="Grid",
                                   level=level,
                                   n.iter=n.iter.grid)
+
   # step 2
   filtObj.nm <- optimize_filter(filtObj.grid,
                                 msnid_small,
                                 fdr.max=fdr.max,
                                 method=method,
                                 level=level,
                                 n.iter=n.iter.nm)
-  return(apply_filter(msnid, filtObj.nm))
+
+  msnid <- apply_filter(msnid, filtObj.nm)
+
+  return(msnid)
 }
+