From 8d26b1232446c22be4b5321f4e13b0f63e28fd62 Mon Sep 17 00:00:00 2001 From: andrew Date: Fri, 24 Aug 2018 21:22:55 -0500 Subject: [PATCH] Implement param and mod files for MS-GF. User-defined PTMs! Params previously hardcoded in runMS2.sh. The param file is easier and safer to modify. --- params/MSGFplus_mods.txt | 49 ++++++++++++++++++++++++++++++++++++++ params/MSGFplus_params.txt | 32 +++++++++++++++++++++++++ runMS2.sh | 24 ++++++++++++++++++- 3 files changed, 104 insertions(+), 1 deletion(-) create mode 100644 params/MSGFplus_mods.txt create mode 100644 params/MSGFplus_params.txt diff --git a/params/MSGFplus_mods.txt b/params/MSGFplus_mods.txt new file mode 100644 index 0000000..4fd5a37 --- /dev/null +++ b/params/MSGFplus_mods.txt @@ -0,0 +1,49 @@ +# This file is used to specify modifications for MS-GF+. + +# It is copied and slightly modified from the MS-GF+ example modification file found here: +# https://bix-lab.ucsd.edu/download/attachments/13533355/Mods.txt + +# Max Number of Modifications per peptide +# If this value is large, the search takes long. +NumMods=3 + +# To input a modification, use the following command: +# Mass or CompositionStr, Residues, ModType, Position, Name (all the five fields are required). +# CompositionStr (C[Num]H[Num]N[Num]O[Num]S[Num]P[Num]Br[Num]Cl[Num]Fe[Num]) +# - C (Carbon), H (Hydrogen), N (Nitrogen), O (Oxygen), S (Sulfer), P (Phosphorus), Br (Bromine), Cl (Chlorine), Fe (Iron), and Se (Selenium) are allowed. +# - Negative numbers are allowed. +# - E.g. C2H2O1 (valid), H2C1O1 (invalid) +# Mass can be used instead of CompositionStr. It is important to specify accurate masses (integer masses are insufficient). +# - E.g. 15.994915 +# Residues: affected amino acids (must be upper letters) +# - Must be uppor letters or * +# - Use * if this modification is applicable to any residue. +# - * should not be "anywhere" modification (e.g. "15.994915, *, opt, any, Oxidation" is not allowed.) +# - E.g. NQ, * +# ModType: "fix" for fixed modifications, "opt" for variable modifications (case insensitive) +# Position: position in the peptide where the modification can be attached. +# - One of the following five values should be used: +# - any (anywhere), N-term (peptide N-term), C-term (peptide C-term), Prot-N-term (protein N-term), Prot-C-term (protein C-term) +# - Case insensitive +# - "-" can be omitted +# - E.g. any, Any, Prot-n-Term, ProtNTerm => all valid +# Name: name of the modification (Unimod PSI-MS name) +# - For proper mzIdentML output, this name should be the same as the Unimod PSI-MS name +# - E.g. Phospho, Acetyl +# - Visit http://www.unimod.org to get PSI-MS names. + +57.021464,C,fix,any,Carbamidomethyl # Fixed Carbamidomethyl C +#144.102063,*,fix,N-term,iTRAQ4plex # iTRAQ 4 plex +#144.102063,K,fix,any,iTRAQ4plex # iTRAQ 4 plex + +# Variable Modifications (default: none) +15.9949,M,opt,any,Oxidation # Oxidation M +#O1,M,opt,any,Oxidation # Oxidation M (CompositionStr can be used instead of mass) +#H-1N-1O1,NQ,opt,any,Deamidated # Negative numbers are allowed. +#C2H3NO,*,opt,N-term,Carbamidomethyl # Variable Carbamidomethyl N-term +#H-2O-1,E,opt,N-term,Glu->pyro-Glu # Pyro-glu from E +#H-3N-1,Q,opt,N-term,Gln->pyro-Glu # Pyro-glu from Q +#C2H2O,*,opt,Prot-N-term,Acetyl # Acetylation Protein N-term +#C2H2O1,K,opt,any,Acetyl # Acetylation K +#CH2,K,opt,any,Methyl # Methylation K +#HO3P,STY,opt,any,Phospho # Phosphorylation STY diff --git a/params/MSGFplus_params.txt b/params/MSGFplus_params.txt new file mode 100644 index 0000000..4099c18 --- /dev/null +++ b/params/MSGFplus_params.txt @@ -0,0 +1,32 @@ +# MS-GF+ parameters +# Documentation: https://bix-lab.ucsd.edu/pages/viewpage.action?pageId=13533355 + +# Comments are only allowed on lines starting with # symbol. +# Make sure there is one space between parameter flag and value. +# No spaces are allowed within a value. + +# Precursor mass tolerance. This can be assymetric, ex. "-t 10ppm,4Da". +-t 10ppm + +# Number of enzyme-specific termini. +-ntt 2 + +# Enzyme ID. 0:nonspecific, 1:Trypsin(default), 2:Chymotrypsin, 3:Lys-C, +# 4:Lys-N, 5:Glu-C, 6: Arg-C, 7:Asp-N, 8:alphaLP, 9:no cleavage +-e 1 + +# Instrument ID. 0:Low-res MS/MS, 1:High-res, 2:TOF (10ppm MS/MS), 3:Q-Exactive HCD +-inst 0 + +# Minimum precursor chargem, default 2 +# -minCharge 2 + +# Maximum precursor charge, default 3 +-maxCharge 6 + +# Minimum peptide length, default 6 +# -minLength 6 + +# Maximum peptide length, default 40 +# -maxLength 40 + diff --git a/runMS2.sh b/runMS2.sh index bf4bf9b..07b146d 100755 --- a/runMS2.sh +++ b/runMS2.sh @@ -129,6 +129,28 @@ if [ ! -f "$MSGFplus_JAR" ] echo "msgfplus jar $MSGFplus_JAR not found!" exit 0 fi + +### Create MS-GF+ parameter string. +MSFGplus_MODFILE="${PIPELINEDIR}/params/MSGFplus_mods.txt" +if [ ! -f "$MSFGplus_MODFILE" ] +then + echo "MSGF+ modification file $MSFGplus_MODFILE not found! Using default of static C+57 and no optional modifications." + MSGFplus_MODPARAM="" +else + MSGFplus_MODPARAM=" -mod \"$MSFGplus_MODFILE\"" +fi + +# Read and parse MS-GF+ param file. Append modification file parameter. +MSGFplus_PARAMFILE="${PIPELINEDIR}/params/MSGFplus_params.txt" +if [ ! -f "$MSGFplus_PARAMFILE" ] +then + echo "MSGF+ parameter file $MSGFplus_PARAMFILE not found!" + exit 0 +else + MSGFplus_PARAMSTR=$(grep -v "#" "$MSGFplus_PARAMFILE" | tr -s '\n\r' ' ')"$MSGFplus_MODPARAM" +fi + + # ##### #Researved for MS1 future analysis @@ -202,7 +224,7 @@ mkdir -p $TANDEMDIR #mv $TANDEMDIR/*tandemK.xml $WORKDIR #MSGF+ TBL=${MSGFOUT/.mzid}.tsv - time java -Xmx20000M -jar $MSGFplus_JAR -d $FASTAFILE -s $1 -o ${MSGFOUT}.mzid -t 10ppm -tda 0 -ntt 2 -e 1 -inst 3 -maxCharge 6 + time java -Xmx20000M -jar $MSGFplus_JAR -d $FASTAFILE -s $1 -o ${MSGFOUT}.mzid -tda 0 "$MSGFplus_PARAMSTR" time java -Xmx20000M -cp $MSGFplus_JAR edu.ucsd.msjava.ui.MzIDToTsv -i ${MSGFOUT}.mzid -o $TBL -showQValue 1 -showDecoy 1 -unroll 0