Skip to content

Commit

Permalink
Implement param and mod files for MS-GF. User-defined PTMs!
Browse files Browse the repository at this point in the history
Params previously hardcoded in runMS2.sh. The param file is easier and safer to modify.
  • Loading branch information
aphorton committed Aug 25, 2018
1 parent 2396218 commit 8d26b12
Show file tree
Hide file tree
Showing 3 changed files with 104 additions and 1 deletion.
49 changes: 49 additions & 0 deletions params/MSGFplus_mods.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# This file is used to specify modifications for MS-GF+.

# It is copied and slightly modified from the MS-GF+ example modification file found here:
# https://bix-lab.ucsd.edu/download/attachments/13533355/Mods.txt

# Max Number of Modifications per peptide
# If this value is large, the search takes long.
NumMods=3

# To input a modification, use the following command:
# Mass or CompositionStr, Residues, ModType, Position, Name (all the five fields are required).
# CompositionStr (C[Num]H[Num]N[Num]O[Num]S[Num]P[Num]Br[Num]Cl[Num]Fe[Num])
# - C (Carbon), H (Hydrogen), N (Nitrogen), O (Oxygen), S (Sulfer), P (Phosphorus), Br (Bromine), Cl (Chlorine), Fe (Iron), and Se (Selenium) are allowed.
# - Negative numbers are allowed.
# - E.g. C2H2O1 (valid), H2C1O1 (invalid)
# Mass can be used instead of CompositionStr. It is important to specify accurate masses (integer masses are insufficient).
# - E.g. 15.994915
# Residues: affected amino acids (must be upper letters)
# - Must be uppor letters or *
# - Use * if this modification is applicable to any residue.
# - * should not be "anywhere" modification (e.g. "15.994915, *, opt, any, Oxidation" is not allowed.)
# - E.g. NQ, *
# ModType: "fix" for fixed modifications, "opt" for variable modifications (case insensitive)
# Position: position in the peptide where the modification can be attached.
# - One of the following five values should be used:
# - any (anywhere), N-term (peptide N-term), C-term (peptide C-term), Prot-N-term (protein N-term), Prot-C-term (protein C-term)
# - Case insensitive
# - "-" can be omitted
# - E.g. any, Any, Prot-n-Term, ProtNTerm => all valid
# Name: name of the modification (Unimod PSI-MS name)
# - For proper mzIdentML output, this name should be the same as the Unimod PSI-MS name
# - E.g. Phospho, Acetyl
# - Visit http://www.unimod.org to get PSI-MS names.

57.021464,C,fix,any,Carbamidomethyl # Fixed Carbamidomethyl C
#144.102063,*,fix,N-term,iTRAQ4plex # iTRAQ 4 plex
#144.102063,K,fix,any,iTRAQ4plex # iTRAQ 4 plex

# Variable Modifications (default: none)
15.9949,M,opt,any,Oxidation # Oxidation M
#O1,M,opt,any,Oxidation # Oxidation M (CompositionStr can be used instead of mass)
#H-1N-1O1,NQ,opt,any,Deamidated # Negative numbers are allowed.
#C2H3NO,*,opt,N-term,Carbamidomethyl # Variable Carbamidomethyl N-term
#H-2O-1,E,opt,N-term,Glu->pyro-Glu # Pyro-glu from E
#H-3N-1,Q,opt,N-term,Gln->pyro-Glu # Pyro-glu from Q
#C2H2O,*,opt,Prot-N-term,Acetyl # Acetylation Protein N-term
#C2H2O1,K,opt,any,Acetyl # Acetylation K
#CH2,K,opt,any,Methyl # Methylation K
#HO3P,STY,opt,any,Phospho # Phosphorylation STY
32 changes: 32 additions & 0 deletions params/MSGFplus_params.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# MS-GF+ parameters
# Documentation: https://bix-lab.ucsd.edu/pages/viewpage.action?pageId=13533355

# Comments are only allowed on lines starting with # symbol.
# Make sure there is one space between parameter flag and value.
# No spaces are allowed within a value.

# Precursor mass tolerance. This can be assymetric, ex. "-t 10ppm,4Da".
-t 10ppm

# Number of enzyme-specific termini.
-ntt 2

# Enzyme ID. 0:nonspecific, 1:Trypsin(default), 2:Chymotrypsin, 3:Lys-C,
# 4:Lys-N, 5:Glu-C, 6: Arg-C, 7:Asp-N, 8:alphaLP, 9:no cleavage
-e 1

# Instrument ID. 0:Low-res MS/MS, 1:High-res, 2:TOF (10ppm MS/MS), 3:Q-Exactive HCD
-inst 0

# Minimum precursor chargem, default 2
# -minCharge 2

# Maximum precursor charge, default 3
-maxCharge 6

# Minimum peptide length, default 6
# -minLength 6

# Maximum peptide length, default 40
# -maxLength 40

24 changes: 23 additions & 1 deletion runMS2.sh
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,28 @@ if [ ! -f "$MSGFplus_JAR" ]
echo "msgfplus jar $MSGFplus_JAR not found!"
exit 0
fi

### Create MS-GF+ parameter string.
MSFGplus_MODFILE="${PIPELINEDIR}/params/MSGFplus_mods.txt"
if [ ! -f "$MSFGplus_MODFILE" ]
then
echo "MSGF+ modification file $MSFGplus_MODFILE not found! Using default of static C+57 and no optional modifications."
MSGFplus_MODPARAM=""
else
MSGFplus_MODPARAM=" -mod \"$MSFGplus_MODFILE\""
fi

# Read and parse MS-GF+ param file. Append modification file parameter.
MSGFplus_PARAMFILE="${PIPELINEDIR}/params/MSGFplus_params.txt"
if [ ! -f "$MSGFplus_PARAMFILE" ]
then
echo "MSGF+ parameter file $MSGFplus_PARAMFILE not found!"
exit 0
else
MSGFplus_PARAMSTR=$(grep -v "#" "$MSGFplus_PARAMFILE" | tr -s '\n\r' ' ')"$MSGFplus_MODPARAM"
fi


#
#####
#Researved for MS1 future analysis
Expand Down Expand Up @@ -202,7 +224,7 @@ mkdir -p $TANDEMDIR
#mv $TANDEMDIR/*tandemK.xml $WORKDIR
#MSGF+
TBL=${MSGFOUT/.mzid}.tsv
time java -Xmx20000M -jar $MSGFplus_JAR -d $FASTAFILE -s $1 -o ${MSGFOUT}.mzid -t 10ppm -tda 0 -ntt 2 -e 1 -inst 3 -maxCharge 6
time java -Xmx20000M -jar $MSGFplus_JAR -d $FASTAFILE -s $1 -o ${MSGFOUT}.mzid -tda 0 "$MSGFplus_PARAMSTR"
time java -Xmx20000M -cp $MSGFplus_JAR edu.ucsd.msjava.ui.MzIDToTsv -i ${MSGFOUT}.mzid -o $TBL -showQValue 1 -showDecoy 1 -unroll 0


Expand Down

0 comments on commit 8d26b12

Please sign in to comment.