Skip to content

Commit

Permalink
Add mokapot integration; write rescoring output to TSV; minor changes
Browse files Browse the repository at this point in the history
  • Loading branch information
RalfG committed Aug 7, 2023
1 parent 3d45fee commit 25f36c8
Show file tree
Hide file tree
Showing 14 changed files with 373 additions and 183 deletions.
7 changes: 5 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ old_files/
prepare_pin_files.py
*.jar

# Ruff
.ruff_cache/

# Atom remote-sync config
.remote-sync.json

Expand Down Expand Up @@ -90,6 +93,7 @@ celerybeat-schedule
# virtualenv
venv/
ENV/
.venv*/

# Spyder project settings
.spyderproject
Expand All @@ -99,5 +103,4 @@ ENV/

# vscode
.vscode/
.pytest_cache/v/cache/nodeids
.pytest_cache/v/cache/stepwise
.pytest_cache/
5 changes: 3 additions & 2 deletions docs/source/userguide/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,9 @@ denotes the official Unimod name.

To correctly parse the various notations to ProForma, :py:mod:`psm_utils.io` readers
require :py:obj:`modification_definitions` that map each specific search engine
modification label to a valid ProForma label. :py:obj:`modification_definitions` is defined as a :py:obj:`list` of :py:obj:`dict`'s.
Each :py:obj:`dict` should contain the following key-value pairs:
modification label to a valid ProForma label. :py:obj:`modification_definitions` is defined as a
:py:obj:`list` of :py:obj:`dict`'s. Each :py:obj:`dict` should contain the following key-value
pairs:

- ``site``: Amino acids or peptide termini where the modification occurs. Should be
the IUPAC one-letter code for amino acid residues and `N-term` or `C-term` for
Expand Down
2 changes: 1 addition & 1 deletion examples/readme.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@ Raw file converted to MGF with the Compomics ThermoRawFileParser (https://github

Searched with MaxQuant v1.6.2.3 with all parameters as described in the dataset's original article, with FDR-filtering set to 1 (no filtering)
Also searched with MS-GF+ v2019.02.28
MS²ReScore tested with Percolator version 3.02.1
MS²Rescore tested with Percolator version 3.02.1

Fasta file downloaded from https://www.uniprot.org/proteomes/UP000005640 and https://www.thegpm.org/crap/
22 changes: 13 additions & 9 deletions ms2rescore/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@
CONSOLE = Console(record=True)


def _build_credits():
"""Build credits."""
def _print_credits():
"""Print software credits to terminal."""
text = Text()
text.append("\n")
text.append("MS²Rescore", style="bold link https://github.com/compomics/ms2rescore")
Expand All @@ -45,8 +45,7 @@ def _build_credits():
)
text.append("\n")
text.stylize("cyan")
return text

CONSOLE.print(text)

def _setup_logging(passed_level: str, log_file: Union[str, Path]):
"""Setup logging for writing to log file and Rich Console."""
Expand Down Expand Up @@ -146,18 +145,23 @@ def _parse_arguments() -> argparse.Namespace:

def main():
"""Run MS²Rescore command-line interface."""
CONSOLE.print(_build_credits())
_print_credits()

cli_args = _parse_arguments()
if cli_args.config_file:
config = parse_configurations([cli_args.config_file, cli_args])
else:
config = parse_configurations(cli_args)

output_file_root = Path(config["ms2rescore"]["psm_file"]).with_suffix("")
_setup_logging(
config["ms2rescore"]["log_level"], str(output_file_root) + "-ms2rescore-log.txt"
)
if config["ms2rescore"]["output_path"]:
output_file_root = (
Path(config["ms2rescore"]["output_path"])
/ Path(config["ms2rescore"]["psm_file"]).stem
).as_posix()
else:
output_file_root = Path(config["ms2rescore"]["psm_file"]).with_suffix("").as_posix()

_setup_logging(config["ms2rescore"]["log_level"], output_file_root + "-ms2rescore-log.txt")

try:
ms2rescore = MS2Rescore(configuration=config)
Expand Down
3 changes: 1 addition & 2 deletions ms2rescore/feature_generators/ms2pip.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import warnings
from itertools import chain
from typing import List, Optional, Union
import math

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -304,7 +303,7 @@ def _calculate_features_single(self, processing_result: ProcessingResult) -> Uni
features = dict(
zip(
self.feature_names,
[0.0 if math.isnan(ft) else ft for ft in feature_values],
[0.0 if np.isnan(ft) else ft for ft in feature_values],
)
)

Expand Down
5 changes: 3 additions & 2 deletions ms2rescore/gui/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ def __init__(self, *args, **kwargs):
self.configure(fg_color="transparent")
self.grid_columnconfigure(0, weight=1)

self.title = ctk.CTkLabel(self, text="MS²PIP", fg_color="gray30", corner_radius=6)
self.title = widgets.Heading(self, text="MS²PIP")
self.title.grid(row=0, column=0, columnspan=2, pady=(0, 5), sticky="ew")

self.enabled = widgets.LabeledSwitch(self, label="Enable MS²PIP", default=True)
Expand Down Expand Up @@ -393,7 +393,7 @@ def __init__(self, *args, **kwargs):
self.configure(fg_color="transparent")
self.grid_columnconfigure(0, weight=1)

self.title = ctk.CTkLabel(self, text="DeepLC", fg_color="gray30", corner_radius=6)
self.title = widgets.Heading(self, text="DeepLC")
self.title.grid(row=0, column=0, columnspan=2, pady=(0, 5), sticky="ew")

self.enabled = widgets.LabeledSwitch(self, label="Enable DeepLC", default=True)
Expand Down Expand Up @@ -461,6 +461,7 @@ def function(config):


def app():
"""Start the application."""
root = Function2CTk(
sidebar_frame=SideBar,
config_frame=ConfigFrame,
Expand Down
8 changes: 4 additions & 4 deletions ms2rescore/gui/function2ctk.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,10 +230,10 @@ def __init__(
self.stop_callback = stop_callback
self.stop_button_pressed = False

self.grid_columnconfigure(0, weight=2)
self.grid_columnconfigure(1, weight=1)
self.grid_columnconfigure(0, weight=1)
self.grid_columnconfigure(1, minsize=140)

self.progress_bar = ctk.CTkProgressBar(self) # , width=150)
self.progress_bar = ctk.CTkProgressBar(self)

self.start_button = ctk.CTkButton(master=self, command=self._start_callback, text="Start")
self.stop_button = ctk.CTkButton(master=self, command=self._stop_callback, text="Stop")
Expand All @@ -259,7 +259,7 @@ def _start_callback(self):
self.stop_button.grid(row=0, column=1, sticky="ew")

# Show and activate progress bar
self.progress_bar.grid(row=0, column=0, sticky="ew")
self.progress_bar.grid(row=0, column=0, sticky="ew", padx=10)
self.progress_bar.configure(mode="indeterminate")
self.progress_bar.start()

Expand Down
24 changes: 19 additions & 5 deletions ms2rescore/gui/widgets.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,16 @@
import customtkinter as ctk


class Heading(ctk.CTkLabel):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.configure(
fg_color=("gray80", "gray30"),
text_color=("black", "white"),
corner_radius=6,
)


class LabeledEntry(ctk.CTkFrame):
def __init__(self, *args, label="Enter text", placeholder_text="Enter text...", **kwargs):
super().__init__(*args, **kwargs)
Expand Down Expand Up @@ -58,10 +68,8 @@ def __init__(self, *args, label="Select option", options=[], default_value=None,

self._radio_buttons = []
for i, option in enumerate(options):
radio_button = ctk.CTkRadioButton(
self, text=option, variable=self.value, value=option
)
radio_button.grid(row=i+1, column=0, padx=0, pady=(0, 5), sticky="w")
radio_button = ctk.CTkRadioButton(self, text=option, variable=self.value, value=option)
radio_button.grid(row=i + 1, column=0, padx=0, pady=(0, 5), sticky="w")
self._radio_buttons.append(radio_button)

def get(self):
Expand Down Expand Up @@ -316,7 +324,13 @@ def __init__(self, *args, label=None, columns=2, header_labels=["A", "B"], **kwa
for i, header in enumerate(self.header_labels):
header_row.grid_columnconfigure(i, weight=1, uniform=self.uniform_hash)
padx = (0, 5) if i < len(self.header_labels) - 1 else (0, 0)
label = ctk.CTkLabel(header_row, text=header, fg_color="gray30", corner_radius=6)
label = ctk.CTkLabel(
header_row,
text=header,
fg_color=("gray80", "gray30"),
text_color=("black", "white"),
corner_radius=6,
)
label.grid(row=0, column=i, padx=padx, sticky="ew")

# Input rows
Expand Down
34 changes: 32 additions & 2 deletions ms2rescore/ms2rescore_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from ms2rescore.exceptions import MS2RescoreConfigurationError, MS2RescoreError
from ms2rescore.feature_generators import FEATURE_GENERATORS
from ms2rescore.rescoring_engines import percolator
from ms2rescore.rescoring_engines import mokapot, percolator

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -86,6 +86,11 @@ def run(self):
# Ensure that spectrum IDs are strings (Pydantic 2.0 does not coerce int to str)
psm_list["spectrum_id"] = [str(spec_id) for spec_id in psm_list["spectrum_id"]]

# Store values for comparison later
psm_data_before = psm_list.to_dataframe()[
["score", "qvalue", "pep", "is_decoy", "rank"]
].copy()

# Add rescoring features
feature_names = dict()
for fgen_name, fgen_config in self.config["feature_generators"].items():
Expand Down Expand Up @@ -114,6 +119,7 @@ def run(self):
logging.debug(f"Creating USIs for {len(psm_list)} PSMs")
psm_list["spectrum_id"] = [psm.get_usi(as_url=False) for psm in psm_list]

# Rescore PSMs
if "percolator" in self.config["rescoring_engine"]:
percolator.rescore(
psm_list,
Expand All @@ -124,7 +130,31 @@ def run(self):
)

elif "mokapot" in self.config["rescoring_engine"]:
raise NotImplementedError()
mokapot.rescore(psm_list, mokapot_kwargs=self.config["rescoring_engine"]["mokapot"])

psm_data_after = psm_list.to_dataframe()[
["score", "qvalue", "pep", "is_decoy", "rank"]
].copy()

# Compare results
id_psms_before = (
(psm_data_before["qvalue"] < 0.01) & (psm_data_before["is_decoy"] == False)
).sum()
id_psms_after = (
(psm_data_after["qvalue"] < 0.01) & (psm_data_after["is_decoy"] == False)
).sum()
diff = id_psms_after - id_psms_before
diff_perc = diff / id_psms_before
logger.info(f"Identified {diff} ({diff_perc:.2%}) more PSMs at 1% FDR after rescoring.")

# Write output
logger.info(f"Writing output to {self.output_file_root}.psms.tsv...")
psm_utils.io.write_file(
psm_list,
self.output_file_root + ".psms.tsv",
filetype="tsv",
show_progressbar=True,
)


def _match_psm_ids(old_id, regex_pattern):
Expand Down
Loading

0 comments on commit 25f36c8

Please sign in to comment.