Skip to content

Commit

Permalink
Merge pull request #28 from PNNL-Comp-Mass-Spec/update/run_plexedpipe…
Browse files Browse the repository at this point in the history
…r_redox

Add redox processing capability to run_plexedpiper
  • Loading branch information
TylerSagendorf authored Jul 13, 2023
2 parents c5865fe + 38c98f4 commit c6ab690
Show file tree
Hide file tree
Showing 17 changed files with 419 additions and 336 deletions.
62 changes: 18 additions & 44 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help

on:
push:
branches:
master
branches: [main, master]
pull_request:
branches: master
branches: [main, master]
workflow_dispatch:
branches: [main, master]

name: R-CMD-check

Expand All @@ -20,56 +20,30 @@ jobs:
fail-fast: false
matrix:
config:
- {os: macos-latest, r: 'release'}
- {os: windows-latest, r: 'release'}
- {os: macOS-latest, r: 'release'}
- {os: ubuntu-latest, r: 'release'}

env:
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
RSPM: ${{ matrix.config.rspm }}
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
R_KEEP_PKG_SOURCE: yes

steps:
- name: Set up Git Repository
uses: actions/checkout@v2
- uses: actions/checkout@v3

- uses: r-lib/actions/setup-pandoc@v2

- name: Set up R
uses: r-lib/actions/setup-r@v1
- uses: r-lib/actions/setup-r@v2
with:
r-version: ${{ matrix.config.r }}
http-user-agent: ${{ matrix.config.http-user-agent }}
use-public-rspm: true

- name: Set up Pandoc
uses: r-lib/actions/setup-pandoc@v1

- name: Query dependencies
run: |
install.packages("devtools")
saveRDS(devtools::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
shell: Rscript {0}

- name: Restore R package cache
uses: actions/cache@v2
- uses: r-lib/actions/setup-r-dependencies@v2
with:
path: ${{ env.R_LIBS_USER }}
key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-

- name: Install dependencies
run: |
devtools::install_deps(dependencies = TRUE)
shell: Rscript {0}

- name: Check
env:
_R_CHECK_CRAN_INCOMING_REMOTE_: false
run: |
options(crayon.enabled = TRUE)
devtools::check(error_on = "error", vignettes = FALSE)
shell: Rscript {0}
extra-packages: any::rcmdcheck
needs: check

- name: Upload check results
if: failure()
uses: actions/upload-artifact@main
- uses: r-lib/actions/check-r-package@v2
with:
name: ${{ runner.os }}-r${{ matrix.config.r }}-results
path: check
upload-snapshots: true
4 changes: 0 additions & 4 deletions .github/workflows/pkgdown.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
push:
branches: [main, master]
pull_request:
branches: [main, master]
release:
types: [published]
workflow_dispatch:
Expand Down
7 changes: 3 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: PlexedPiper
Type: Package
Title: Pipeline for isobaric quantification
Version: 0.4.1
Date: 2023-02-07
Version: 0.4.2
Date: 2023-07-06
Author: Vladislav Petyuk vladislav.petyuk@pnnl.gov
Maintainer: Vladislav Petyuk <vladislav.petyuk@pnnl.gov>
Description: Pipeline for isobaric quantification.
Expand All @@ -11,7 +11,7 @@ Encoding: UTF-8
LazyData: true
RoxygenNote: 7.2.3
Depends:
MSnID (>= 1.18.1)
MSnID (>= 1.25.2)
Imports:
Biostrings,
data.table,
Expand All @@ -21,7 +21,6 @@ Imports:
purrr,
tibble,
tidyr,
tidyselect,
readr,
utils
Suggests:
Expand Down
5 changes: 3 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ importFrom(data.table,setnames)
importFrom(dplyr,"%>%")
importFrom(dplyr,across)
importFrom(dplyr,all_of)
importFrom(dplyr,any_of)
importFrom(dplyr,arrange)
importFrom(dplyr,bind_cols)
importFrom(dplyr,bind_rows)
Expand All @@ -76,20 +77,20 @@ importFrom(dplyr,mutate)
importFrom(dplyr,n)
importFrom(dplyr,pull)
importFrom(dplyr,rename)
importFrom(dplyr,rowwise)
importFrom(dplyr,select)
importFrom(dplyr,starts_with)
importFrom(dplyr,summarise)
importFrom(dplyr,summarize)
importFrom(dplyr,ungroup)
importFrom(dplyr,where)
importFrom(plyr,llply)
importFrom(purrr,map)
importFrom(purrr,reduce)
importFrom(readr,read_tsv)
importFrom(tibble,rownames_to_column)
importFrom(tidyr,pivot_longer)
importFrom(tidyr,pivot_wider)
importFrom(tidyr,separate)
importFrom(tidyselect,where)
importFrom(utils,read.delim)
importFrom(utils,read.table)
importFrom(utils,write.table)
Expand Down
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# PlexedPiper 0.4.2 (2023-07-06)

- Update `run_plexedpiper` to process redox proteomics data.
- Add more robust checks for `read_study_design` output.
- Set minimum MSnID version to 1.25.2, though this does not prevent installation of an incorrect version of MSnID from Bioconductor, since the version number on Bioconductor is higher.

# PlexedPiper 0.4.1 (2023-02-07)

- Removed duplicate GENCODE protein IDs from `run_plexedpiper` output. GENCODE IDs are currently only unique when combining the protein (ENSP) and transcript (ENST) IDs. Since there are so few duplicates, we will remove them rather than concatenating these IDs in the "protein_id" column of the output of `make_results_ratio_*` and `make_rii_peptide_*` functions.
Expand Down
122 changes: 65 additions & 57 deletions R/filter_msfragger_data.R
Original file line number Diff line number Diff line change
@@ -1,46 +1,44 @@
#' Filtering msfragger Data
#' Filtering MSFragger Data
#'
#' Filtering MSFragger data. In this implementation, the peptide-level filter
#' optimizes both ppm and one of Expectation or PeptideProphet Probability
#' thresholds to achieve maximum number of peptide identifications within a
#' given FDR constraint.
#'
#' Filtering msfragger data. In this implementation, the peptide-level filter
#' optimizes both ppm and one of Expectation or PeptideProphet Probability thresholds
#' to achieve maximum number of peptide identifications within a given FDR constraint.
#' The accession-level filter optimizes based on `peptides_per_1000aa`, so
#' \code{\link{compute_num_peptides_per_1000aa}} must be used first.
#'
#' @md
#'
#' @param msnid (MSnID object) collated msfragger output
#' @param msnid (MSnID object) collated MSFragger output
#' @param fdr.max (numeric) Maximum acceptable FDR. Default is 0.01 (1%).
#' @param level (character) Level at which to perform FDR filter. The name of a
#' column in `psms(msnid)`. Currently, only `"peptide"` or `"accession"` are
#' supported. The added level `SiteID` makes sense only for PTM data and
#' supported. The added level `"SiteID"` makes sense only for PTM data and
#' first requires mapping of the modification site using
#' `MSnID::map_mod_sites`.
#' @param filtering_criterion (character) One of "evalue" which is
#' expectation value or "pp_prob" - peptide prophet probability. Default is
#' "pp_prob".
#' `MSnID::map_mod_sites`.
#' @param filtering_criterion (character) One of `"evalue"` which is expectation
#' value or `"pp_prob"` - peptide prophet probability. Default is "pp_prob".
#' @param n.iter.grid (numeric) number of grid-distributed evaluation points.
#' @param n.iter.nm (numeric) number of iterations for Nelder-Mead optimization
#' algorithm.
#' @param ... arguments passed to `filter_msfragger_data`.
#'
#' @return (MSnID object) filtered msfragger output
#' @return (MSnID object) filtered MSFragger output
#'
#' @seealso
#' \code{\link[MSnID]{MSnIDFilter}}
#' \code{\link[MSnID]{optimize_filter}}
#' \code{\link[MSnID]{apply_filter}}
#' @seealso \code{\link[MSnID]{MSnIDFilter}}
#' \code{\link[MSnID]{optimize_filter}} \code{\link[MSnID]{apply_filter}}
#'
#' @importFrom MSnID MSnIDFilter optimize_filter
#' mass_measurement_error apply_filter
#' @importFrom MSnID MSnIDFilter optimize_filter mass_measurement_error
#' apply_filter


#' @export
filter_msfragger_data <- function(msnid,
level,
filtering_criterion = c("pp_prob","evalue"),
fdr.max=0.01,
n.iter.grid=500,
n.iter.nm=100){
level,
filtering_criterion = c("pp_prob", "evalue"),
fdr.max=0.01,
n.iter.grid=500,
n.iter.nm=100){

# Clean up on exit
on.exit(rm(list = ls()))
Expand All @@ -49,18 +47,18 @@ filter_msfragger_data <- function(msnid,
# Check input
level <- match.arg(level, choices = c("peptide", "accession", "SiteID"))
filtering_criterion <- match.arg(filtering_criterion)

if(level == "SiteID" & !("SiteID" %in% names(msnid)))
stop("Column 'SiteID' is not in the MSnID object. Please map the PTMs first.")

if (level == "SiteID" & !("SiteID" %in% names(msnid))) {
stop("Column 'SiteID' is not in the MSnID object. Please map the PTMs first.")
}

keep_cols <- c(level, "isDecoy") # columns to calculate FDR

# Create MSnID of minimum size
suppressMessages(msnid_small <- MSnID())

# Setup
if (level == "accession") {

# Add filter criteria column
keep_cols <- c(keep_cols, "peptides_per_1000aa")
msnid_small@psms <- unique(msnid@psms[, keep_cols, with = FALSE])
Expand All @@ -69,36 +67,41 @@ filter_msfragger_data <- function(msnid,
filtObj <- MSnIDFilter(msnid_small)
filtObj$peptides_per_1000aa <- list(comparison = ">", threshold = 1)
method <- "SANN"

} else {
#Choose filter object probability value
if (filtering_criterion == "evalue") {
msnid$msmsScore <- -log10(msnid$Expectation)
}
if (filtering_criterion == "pp_prob") {
msnid$msmsScore <- msnid$`PeptideProphet Probability`
}
# Create columns for peptide filtering
# Can not use data.table syntax if the msnid has been modified at all,
# as it results in the "Invalid .internal.selfref" warning and
# columns not being created.
msnid$absParentMassErrorPPM <- abs(mass_measurement_error(msnid))

# Add filter criteria columns
keep_cols <- c(keep_cols, "msmsScore", "absParentMassErrorPPM")
msnid_small@psms <- unique(msnid@psms[, keep_cols, with = FALSE])

# Create filter object
filtObj <- MSnIDFilter(msnid_small)
filtObj$absParentMassErrorPPM <- list(comparison = "<", threshold = 10)
if (filtering_criterion == "evalue") {
filtObj$msmsScore <- list(comparison = ">", threshold = 2)
}
if (filtering_criterion == "pp_prob") {
filtObj$msmsScore <- list(comparison = ">", threshold = 0.99)
}
method <- "Nelder-Mead"
#Choose filter object probability value
if (filtering_criterion == "evalue") {
msnid$msmsScore <- -log10(msnid$Expectation)
}

if (filtering_criterion == "pp_prob") {
msnid$msmsScore <- msnid$`PeptideProphet Probability`
}

# Create columns for peptide filtering
# Can not use data.table syntax if the msnid has been modified at all,
# as it results in the "Invalid .internal.selfref" warning and
# columns not being created.
msnid$absParentMassErrorPPM <- abs(mass_measurement_error(msnid))

# Add filter criteria columns
keep_cols <- c(keep_cols, "msmsScore", "absParentMassErrorPPM")
msnid_small@psms <- unique(msnid@psms[, keep_cols, with = FALSE])

# Create filter object
filtObj <- MSnIDFilter(msnid_small)
filtObj$absParentMassErrorPPM <- list(comparison = "<", threshold = 10)

if (filtering_criterion == "evalue") {
filtObj$msmsScore <- list(comparison = ">", threshold = 2)
}

if (filtering_criterion == "pp_prob") {
filtObj$msmsScore <- list(comparison = ">", threshold = 0.99)
}

method <- "Nelder-Mead"
}


# step 1
filtObj.grid <- optimize_filter(filtObj,
Expand All @@ -107,12 +110,17 @@ filter_msfragger_data <- function(msnid,
method="Grid",
level=level,
n.iter=n.iter.grid)

# step 2
filtObj.nm <- optimize_filter(filtObj.grid,
msnid_small,
fdr.max=fdr.max,
method=method,
level=level,
n.iter=n.iter.nm)
return(apply_filter(msnid, filtObj.nm))

msnid <- apply_filter(msnid, filtObj.nm)

return(msnid)
}

Loading

0 comments on commit c6ab690

Please sign in to comment.