diff --git a/.gitignore b/.gitignore index 57623e22..8eaababa 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,9 @@ old_files/ prepare_pin_files.py *.jar +# Ruff +.ruff_cache/ + # Atom remote-sync config .remote-sync.json @@ -90,6 +93,7 @@ celerybeat-schedule # virtualenv venv/ ENV/ +.venv*/ # Spyder project settings .spyderproject @@ -99,5 +103,4 @@ ENV/ # vscode .vscode/ -.pytest_cache/v/cache/nodeids -.pytest_cache/v/cache/stepwise +.pytest_cache/ diff --git a/docs/source/userguide/configuration.rst b/docs/source/userguide/configuration.rst index 081a9b68..a857bf43 100644 --- a/docs/source/userguide/configuration.rst +++ b/docs/source/userguide/configuration.rst @@ -26,8 +26,9 @@ denotes the official Unimod name. To correctly parse the various notations to ProForma, :py:mod:`psm_utils.io` readers require :py:obj:`modification_definitions` that map each specific search engine -modification label to a valid ProForma label. :py:obj:`modification_definitions` is defined as a :py:obj:`list` of :py:obj:`dict`'s. -Each :py:obj:`dict` should contain the following key-value pairs: +modification label to a valid ProForma label. :py:obj:`modification_definitions` is defined as a +:py:obj:`list` of :py:obj:`dict`'s. Each :py:obj:`dict` should contain the following key-value +pairs: - ``site``: Amino acids or peptide termini where the modification occurs. Should be the IUPAC one-letter code for amino acid residues and `N-term` or `C-term` for diff --git a/examples/readme.txt b/examples/readme.txt index bafaebbb..59c391af 100644 --- a/examples/readme.txt +++ b/examples/readme.txt @@ -4,6 +4,6 @@ Raw file converted to MGF with the Compomics ThermoRawFileParser (https://github Searched with MaxQuant v1.6.2.3 with all parameters as described in the dataset's original article, with FDR-filtering set to 1 (no filtering) Also searched with MS-GF+ v2019.02.28 -MS²ReScore tested with Percolator version 3.02.1 +MS²Rescore tested with Percolator version 3.02.1 Fasta file downloaded from https://www.uniprot.org/proteomes/UP000005640 and https://www.thegpm.org/crap/ diff --git a/ms2rescore/__main__.py b/ms2rescore/__main__.py index 33ddb6df..46d29ce0 100644 --- a/ms2rescore/__main__.py +++ b/ms2rescore/__main__.py @@ -32,8 +32,8 @@ CONSOLE = Console(record=True) -def _build_credits(): - """Build credits.""" +def _print_credits(): + """Print software credits to terminal.""" text = Text() text.append("\n") text.append("MS²Rescore", style="bold link https://github.com/compomics/ms2rescore") @@ -45,8 +45,7 @@ def _build_credits(): ) text.append("\n") text.stylize("cyan") - return text - + CONSOLE.print(text) def _setup_logging(passed_level: str, log_file: Union[str, Path]): """Setup logging for writing to log file and Rich Console.""" @@ -146,7 +145,7 @@ def _parse_arguments() -> argparse.Namespace: def main(): """Run MS²Rescore command-line interface.""" - CONSOLE.print(_build_credits()) + _print_credits() cli_args = _parse_arguments() if cli_args.config_file: @@ -154,10 +153,15 @@ def main(): else: config = parse_configurations(cli_args) - output_file_root = Path(config["ms2rescore"]["psm_file"]).with_suffix("") - _setup_logging( - config["ms2rescore"]["log_level"], str(output_file_root) + "-ms2rescore-log.txt" - ) + if config["ms2rescore"]["output_path"]: + output_file_root = ( + Path(config["ms2rescore"]["output_path"]) + / Path(config["ms2rescore"]["psm_file"]).stem + ).as_posix() + else: + output_file_root = Path(config["ms2rescore"]["psm_file"]).with_suffix("").as_posix() + + _setup_logging(config["ms2rescore"]["log_level"], output_file_root + "-ms2rescore-log.txt") try: ms2rescore = MS2Rescore(configuration=config) diff --git a/ms2rescore/feature_generators/ms2pip.py b/ms2rescore/feature_generators/ms2pip.py index 2f1ee6ad..613ebf79 100644 --- a/ms2rescore/feature_generators/ms2pip.py +++ b/ms2rescore/feature_generators/ms2pip.py @@ -5,7 +5,6 @@ import warnings from itertools import chain from typing import List, Optional, Union -import math import numpy as np import pandas as pd @@ -304,7 +303,7 @@ def _calculate_features_single(self, processing_result: ProcessingResult) -> Uni features = dict( zip( self.feature_names, - [0.0 if math.isnan(ft) else ft for ft in feature_values], + [0.0 if np.isnan(ft) else ft for ft in feature_values], ) ) diff --git a/ms2rescore/gui/app.py b/ms2rescore/gui/app.py index 45b8b518..8cae445d 100644 --- a/ms2rescore/gui/app.py +++ b/ms2rescore/gui/app.py @@ -356,7 +356,7 @@ def __init__(self, *args, **kwargs): self.configure(fg_color="transparent") self.grid_columnconfigure(0, weight=1) - self.title = ctk.CTkLabel(self, text="MS²PIP", fg_color="gray30", corner_radius=6) + self.title = widgets.Heading(self, text="MS²PIP") self.title.grid(row=0, column=0, columnspan=2, pady=(0, 5), sticky="ew") self.enabled = widgets.LabeledSwitch(self, label="Enable MS²PIP", default=True) @@ -393,7 +393,7 @@ def __init__(self, *args, **kwargs): self.configure(fg_color="transparent") self.grid_columnconfigure(0, weight=1) - self.title = ctk.CTkLabel(self, text="DeepLC", fg_color="gray30", corner_radius=6) + self.title = widgets.Heading(self, text="DeepLC") self.title.grid(row=0, column=0, columnspan=2, pady=(0, 5), sticky="ew") self.enabled = widgets.LabeledSwitch(self, label="Enable DeepLC", default=True) @@ -461,6 +461,7 @@ def function(config): def app(): + """Start the application.""" root = Function2CTk( sidebar_frame=SideBar, config_frame=ConfigFrame, diff --git a/ms2rescore/gui/function2ctk.py b/ms2rescore/gui/function2ctk.py index a1616124..eb43902f 100644 --- a/ms2rescore/gui/function2ctk.py +++ b/ms2rescore/gui/function2ctk.py @@ -230,10 +230,10 @@ def __init__( self.stop_callback = stop_callback self.stop_button_pressed = False - self.grid_columnconfigure(0, weight=2) - self.grid_columnconfigure(1, weight=1) + self.grid_columnconfigure(0, weight=1) + self.grid_columnconfigure(1, minsize=140) - self.progress_bar = ctk.CTkProgressBar(self) # , width=150) + self.progress_bar = ctk.CTkProgressBar(self) self.start_button = ctk.CTkButton(master=self, command=self._start_callback, text="Start") self.stop_button = ctk.CTkButton(master=self, command=self._stop_callback, text="Stop") @@ -259,7 +259,7 @@ def _start_callback(self): self.stop_button.grid(row=0, column=1, sticky="ew") # Show and activate progress bar - self.progress_bar.grid(row=0, column=0, sticky="ew") + self.progress_bar.grid(row=0, column=0, sticky="ew", padx=10) self.progress_bar.configure(mode="indeterminate") self.progress_bar.start() diff --git a/ms2rescore/gui/widgets.py b/ms2rescore/gui/widgets.py index fc2618fc..d4b4a4f6 100644 --- a/ms2rescore/gui/widgets.py +++ b/ms2rescore/gui/widgets.py @@ -7,6 +7,16 @@ import customtkinter as ctk +class Heading(ctk.CTkLabel): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.configure( + fg_color=("gray80", "gray30"), + text_color=("black", "white"), + corner_radius=6, + ) + + class LabeledEntry(ctk.CTkFrame): def __init__(self, *args, label="Enter text", placeholder_text="Enter text...", **kwargs): super().__init__(*args, **kwargs) @@ -58,10 +68,8 @@ def __init__(self, *args, label="Select option", options=[], default_value=None, self._radio_buttons = [] for i, option in enumerate(options): - radio_button = ctk.CTkRadioButton( - self, text=option, variable=self.value, value=option - ) - radio_button.grid(row=i+1, column=0, padx=0, pady=(0, 5), sticky="w") + radio_button = ctk.CTkRadioButton(self, text=option, variable=self.value, value=option) + radio_button.grid(row=i + 1, column=0, padx=0, pady=(0, 5), sticky="w") self._radio_buttons.append(radio_button) def get(self): @@ -316,7 +324,13 @@ def __init__(self, *args, label=None, columns=2, header_labels=["A", "B"], **kwa for i, header in enumerate(self.header_labels): header_row.grid_columnconfigure(i, weight=1, uniform=self.uniform_hash) padx = (0, 5) if i < len(self.header_labels) - 1 else (0, 0) - label = ctk.CTkLabel(header_row, text=header, fg_color="gray30", corner_radius=6) + label = ctk.CTkLabel( + header_row, + text=header, + fg_color=("gray80", "gray30"), + text_color=("black", "white"), + corner_radius=6, + ) label.grid(row=0, column=i, padx=padx, sticky="ew") # Input rows diff --git a/ms2rescore/ms2rescore_main.py b/ms2rescore/ms2rescore_main.py index 140ee309..c81078bb 100644 --- a/ms2rescore/ms2rescore_main.py +++ b/ms2rescore/ms2rescore_main.py @@ -10,7 +10,7 @@ from ms2rescore.exceptions import MS2RescoreConfigurationError, MS2RescoreError from ms2rescore.feature_generators import FEATURE_GENERATORS -from ms2rescore.rescoring_engines import percolator +from ms2rescore.rescoring_engines import mokapot, percolator logger = logging.getLogger(__name__) @@ -86,6 +86,11 @@ def run(self): # Ensure that spectrum IDs are strings (Pydantic 2.0 does not coerce int to str) psm_list["spectrum_id"] = [str(spec_id) for spec_id in psm_list["spectrum_id"]] + # Store values for comparison later + psm_data_before = psm_list.to_dataframe()[ + ["score", "qvalue", "pep", "is_decoy", "rank"] + ].copy() + # Add rescoring features feature_names = dict() for fgen_name, fgen_config in self.config["feature_generators"].items(): @@ -114,6 +119,7 @@ def run(self): logging.debug(f"Creating USIs for {len(psm_list)} PSMs") psm_list["spectrum_id"] = [psm.get_usi(as_url=False) for psm in psm_list] + # Rescore PSMs if "percolator" in self.config["rescoring_engine"]: percolator.rescore( psm_list, @@ -124,7 +130,31 @@ def run(self): ) elif "mokapot" in self.config["rescoring_engine"]: - raise NotImplementedError() + mokapot.rescore(psm_list, mokapot_kwargs=self.config["rescoring_engine"]["mokapot"]) + + psm_data_after = psm_list.to_dataframe()[ + ["score", "qvalue", "pep", "is_decoy", "rank"] + ].copy() + + # Compare results + id_psms_before = ( + (psm_data_before["qvalue"] < 0.01) & (psm_data_before["is_decoy"] == False) + ).sum() + id_psms_after = ( + (psm_data_after["qvalue"] < 0.01) & (psm_data_after["is_decoy"] == False) + ).sum() + diff = id_psms_after - id_psms_before + diff_perc = diff / id_psms_before + logger.info(f"Identified {diff} ({diff_perc:.2%}) more PSMs at 1% FDR after rescoring.") + + # Write output + logger.info(f"Writing output to {self.output_file_root}.psms.tsv...") + psm_utils.io.write_file( + psm_list, + self.output_file_root + ".psms.tsv", + filetype="tsv", + show_progressbar=True, + ) def _match_psm_ids(old_id, regex_pattern): diff --git a/ms2rescore/package_data/ms2rescore-gui-theme.json b/ms2rescore/package_data/ms2rescore-gui-theme.json index f78766c2..92c89cf1 100644 --- a/ms2rescore/package_data/ms2rescore-gui-theme.json +++ b/ms2rescore/package_data/ms2rescore-gui-theme.json @@ -1,155 +1,155 @@ { - "CTk": { - "fg_color": ["gray95", "gray10"] - }, - "CTkToplevel": { - "fg_color": ["gray95", "gray10"] - }, - "CTkFrame": { - "corner_radius": 6, - "border_width": 0, - "fg_color": ["gray90", "gray13"], - "top_fg_color": ["gray85", "gray16"], - "border_color": ["gray65", "gray28"] - }, - "CTkButton": { - "corner_radius": 6, - "border_width": 0, - "fg_color": ["#3a7ebf", "#1f538d"], - "hover_color": ["#325882", "#14375e"], - "border_color": ["#3E454A", "#949A9F"], - "text_color": ["#DCE4EE", "#DCE4EE"], - "text_color_disabled": ["gray74", "gray60"] - }, - "CTkLabel": { - "corner_radius": 0, - "fg_color": "transparent", - "text_color": ["gray14", "gray84"] - }, - "CTkEntry": { - "corner_radius": 6, - "border_width": 2, - "fg_color": ["#F9F9FA", "#343638"], - "border_color": ["#979DA2", "#565B5E"], - "text_color": ["gray14", "gray84"], - "placeholder_text_color": ["gray52", "gray62"] - }, - "CTkCheckbox": { - "corner_radius": 6, - "border_width": 3, - "fg_color": ["#3a7ebf", "#1f538d"], - "border_color": ["#3E454A", "#949A9F"], - "hover_color": ["#325882", "#14375e"], - "checkmark_color": ["#DCE4EE", "gray90"], - "text_color": ["gray14", "gray84"], - "text_color_disabled": ["gray60", "gray45"] - }, - "CTkSwitch": { - "corner_radius": 1000, - "border_width": 3, - "button_length": 0, - "fg_color": ["#939BA2", "#4A4D50"], - "progress_color": ["#3a7ebf", "#1f538d"], - "button_color": ["gray36", "#D5D9DE"], - "button_hover_color": ["gray20", "gray100"], - "text_color": ["gray14", "gray84"], - "text_color_disabled": ["gray60", "gray45"] - }, - "CTkRadiobutton": { - "corner_radius": 1000, - "border_width_checked": 6, - "border_width_unchecked": 3, - "fg_color": ["#3a7ebf", "#1f538d"], - "border_color": ["#3E454A", "#949A9F"], - "hover_color": ["#325882", "#14375e"], - "text_color": ["gray14", "gray84"], - "text_color_disabled": ["gray60", "gray45"] - }, - "CTkProgressBar": { - "corner_radius": 1000, - "border_width": 0, - "fg_color": ["#939BA2", "#4A4D50"], - "progress_color": ["#3a7ebf", "#1f538d"], - "border_color": ["gray", "gray"] - }, - "CTkSlider": { - "corner_radius": 1000, - "button_corner_radius": 1000, - "border_width": 6, - "button_length": 0, - "fg_color": ["#939BA2", "#4A4D50"], - "progress_color": ["gray40", "#AAB0B5"], - "button_color": ["#3a7ebf", "#1f538d"], - "button_hover_color": ["#325882", "#14375e"] - }, - "CTkOptionMenu": { - "corner_radius": 6, - "fg_color": ["#3a7ebf", "#1f538d"], - "button_color": ["#325882", "#14375e"], - "button_hover_color": ["#234567", "#1e2c40"], - "text_color": ["#DCE4EE", "#DCE4EE"], - "text_color_disabled": ["gray74", "gray60"] - }, - "CTkComboBox": { - "corner_radius": 6, - "border_width": 2, - "fg_color": ["#F9F9FA", "#343638"], - "border_color": ["#979DA2", "#565B5E"], - "button_color": ["#979DA2", "#565B5E"], - "button_hover_color": ["#6E7174", "#7A848D"], - "text_color": ["gray14", "gray84"], - "text_color_disabled": ["gray50", "gray45"] - }, - "CTkScrollbar": { - "corner_radius": 1000, - "border_spacing": 4, - "fg_color": "transparent", - "button_color": ["gray55", "gray41"], - "button_hover_color": ["gray40", "gray53"] - }, - "CTkSegmentedButton": { - "corner_radius": 6, - "border_width": 2, - "fg_color": ["#979DA2", "gray29"], - "selected_color": ["#3a7ebf", "#1f538d"], - "selected_hover_color": ["#325882", "#14375e"], - "unselected_color": ["#979DA2", "gray29"], - "unselected_hover_color": ["gray70", "gray41"], - "text_color": ["#DCE4EE", "#DCE4EE"], - "text_color_disabled": ["gray74", "gray60"] - }, - "CTkTextbox": { - "corner_radius": 6, - "border_width": 0, - "fg_color": ["gray100", "gray20"], - "border_color": ["#979DA2", "#565B5E"], - "text_color": ["gray14", "gray84"], - "scrollbar_button_color": ["gray55", "gray41"], - "scrollbar_button_hover_color": ["gray40", "gray53"] - }, - "CTkScrollableFrame": { - "label_fg_color": ["gray80", "gray21"] - }, - "DropdownMenu": { - "fg_color": ["gray90", "gray20"], - "hover_color": ["gray75", "gray28"], - "text_color": ["gray14", "gray84"] - }, - "CTkFont": { - "macOS": { - "family": "SF Display", - "size": 13, - "weight": "normal" - }, - "Windows": { - "family": "Roboto", - "size": 13, - "weight": "normal" - }, - "Linux": { - "family": "Roboto", - "size": 13, - "weight": "normal" + "CTk": { + "fg_color": ["gray95", "gray10"] + }, + "CTkToplevel": { + "fg_color": ["gray95", "gray10"] + }, + "CTkFrame": { + "corner_radius": 6, + "border_width": 0, + "fg_color": ["gray90", "gray13"], + "top_fg_color": ["gray85", "gray16"], + "border_color": ["gray65", "gray28"] + }, + "CTkButton": { + "corner_radius": 6, + "border_width": 0, + "fg_color": ["#3a7ebf", "#1f538d"], + "hover_color": ["#325882", "#14375e"], + "border_color": ["#3E454A", "#949A9F"], + "text_color": ["#DCE4EE", "#DCE4EE"], + "text_color_disabled": ["gray74", "gray60"] + }, + "CTkLabel": { + "corner_radius": 0, + "fg_color": "transparent", + "text_color": ["gray14", "gray84"] + }, + "CTkEntry": { + "corner_radius": 6, + "border_width": 2, + "fg_color": ["#F9F9FA", "#343638"], + "border_color": ["#979DA2", "#565B5E"], + "text_color": ["gray14", "gray84"], + "placeholder_text_color": ["gray52", "gray62"] + }, + "CTkCheckbox": { + "corner_radius": 6, + "border_width": 3, + "fg_color": ["#3a7ebf", "#1f538d"], + "border_color": ["#3E454A", "#949A9F"], + "hover_color": ["#325882", "#14375e"], + "checkmark_color": ["#DCE4EE", "gray90"], + "text_color": ["gray14", "gray84"], + "text_color_disabled": ["gray60", "gray45"] + }, + "CTkSwitch": { + "corner_radius": 1000, + "border_width": 3, + "button_length": 0, + "fg_color": ["#939BA2", "#4A4D50"], + "progress_color": ["#3a7ebf", "#1f538d"], + "button_color": ["gray36", "#D5D9DE"], + "button_hover_color": ["gray20", "gray100"], + "text_color": ["gray14", "gray84"], + "text_color_disabled": ["gray60", "gray45"] + }, + "CTkRadiobutton": { + "corner_radius": 1000, + "border_width_checked": 6, + "border_width_unchecked": 3, + "fg_color": ["#3a7ebf", "#1f538d"], + "border_color": ["#3E454A", "#949A9F"], + "hover_color": ["#325882", "#14375e"], + "text_color": ["gray14", "gray84"], + "text_color_disabled": ["gray60", "gray45"] + }, + "CTkProgressBar": { + "corner_radius": 1000, + "border_width": 0, + "fg_color": ["#939BA2", "#4A4D50"], + "progress_color": ["#3a7ebf", "#1f538d"], + "border_color": ["gray", "gray"] + }, + "CTkSlider": { + "corner_radius": 1000, + "button_corner_radius": 1000, + "border_width": 6, + "button_length": 0, + "fg_color": ["#939BA2", "#4A4D50"], + "progress_color": ["gray40", "#AAB0B5"], + "button_color": ["#3a7ebf", "#1f538d"], + "button_hover_color": ["#325882", "#14375e"] + }, + "CTkOptionMenu": { + "corner_radius": 6, + "fg_color": ["#3a7ebf", "#1f538d"], + "button_color": ["#325882", "#14375e"], + "button_hover_color": ["#234567", "#1e2c40"], + "text_color": ["#DCE4EE", "#DCE4EE"], + "text_color_disabled": ["gray74", "gray60"] + }, + "CTkComboBox": { + "corner_radius": 6, + "border_width": 2, + "fg_color": ["#F9F9FA", "#343638"], + "border_color": ["#979DA2", "#565B5E"], + "button_color": ["#979DA2", "#565B5E"], + "button_hover_color": ["#6E7174", "#7A848D"], + "text_color": ["gray14", "gray84"], + "text_color_disabled": ["gray50", "gray45"] + }, + "CTkScrollbar": { + "corner_radius": 1000, + "border_spacing": 4, + "fg_color": "transparent", + "button_color": ["gray55", "gray41"], + "button_hover_color": ["gray40", "gray53"] + }, + "CTkSegmentedButton": { + "corner_radius": 6, + "border_width": 2, + "fg_color": ["#979DA2", "gray29"], + "selected_color": ["#3a7ebf", "#1f538d"], + "selected_hover_color": ["#325882", "#14375e"], + "unselected_color": ["#979DA2", "gray29"], + "unselected_hover_color": ["gray70", "gray41"], + "text_color": ["#DCE4EE", "#DCE4EE"], + "text_color_disabled": ["gray74", "gray60"] + }, + "CTkTextbox": { + "corner_radius": 6, + "border_width": 0, + "fg_color": ["gray100", "gray20"], + "border_color": ["#979DA2", "#565B5E"], + "text_color": ["gray14", "gray84"], + "scrollbar_button_color": ["gray55", "gray41"], + "scrollbar_button_hover_color": ["gray40", "gray53"] + }, + "CTkScrollableFrame": { + "label_fg_color": ["gray80", "gray21"] + }, + "DropdownMenu": { + "fg_color": ["gray90", "gray20"], + "hover_color": ["gray75", "gray28"], + "text_color": ["gray14", "gray84"] + }, + "CTkFont": { + "macOS": { + "family": "SF Display", + "size": 13, + "weight": "normal" + }, + "Windows": { + "family": "Roboto", + "size": 13, + "weight": "normal" + }, + "Linux": { + "family": "Roboto", + "size": 13, + "weight": "normal" + } } - } } diff --git a/ms2rescore/plotting.py b/ms2rescore/plotting.py index abaff004..242668a4 100644 --- a/ms2rescore/plotting.py +++ b/ms2rescore/plotting.py @@ -1,4 +1,4 @@ -"""Plot MS²ReScore results.""" +"""Plot MS²Rescore results.""" import logging import os diff --git a/ms2rescore/rescoring_engines/mokapot.py b/ms2rescore/rescoring_engines/mokapot.py new file mode 100644 index 00000000..64655851 --- /dev/null +++ b/ms2rescore/rescoring_engines/mokapot.py @@ -0,0 +1,137 @@ +"""Mokapot integration for MS²Rescore.""" + +import logging +from typing import Any, Dict, List, Optional + +import numpy as np +import pandas as pd +import psm_utils +from mokapot.brew import brew +from mokapot.dataset import LinearPsmDataset +from pyteomics.mass import nist_mass + +logger = logging.getLogger(__name__) + + +def rescore( + psm_list: psm_utils.PSMList, + mokapot_kwargs: Optional[Dict[str, Any]] = None, +): + """ + Rescore PSMs with Mokapot. + + Parameters + ---------- + psm_list + PSMs to be rescored. + mokapot_kwargs + Additional keyword arguments for Mokapot. Defaults to ``None``. + + """ + # Convert PSMList to Mokapot dataset + feature_names = psm_list[0].rescoring_features.keys() + lin_psm_data = convert_psm_list(psm_list, feature_names) + + # Rescore + confidence_results, model = brew(lin_psm_data, **mokapot_kwargs) + + # Reshape confidence estimates to match PSMList + mokapot_values_targets = ( + confidence_results.confidence_estimates["psms"] + .set_index("index") + .sort_index()[["mokapot score", "mokapot q-value", "mokapot PEP"]] + ) + mokapot_values_decoys = ( + confidence_results.decoy_confidence_estimates["psms"] + .set_index("index") + .sort_index()[["mokapot score", "mokapot q-value", "mokapot PEP"]] + ) + q = np.full((len(psm_list), 3), np.nan) + q[mokapot_values_targets.index] = mokapot_values_targets.values + q[mokapot_values_decoys.index] = mokapot_values_decoys.values + + # Add Mokapot results to PSMList + psm_list["score"] = q[:, 0] + psm_list["qvalue"] = q[:, 1] + psm_list["pep"] = q[:, 2] + + +def convert_psm_list( + psm_list: psm_utils.PSMList, + feature_names: List[str], + keep_lower_rank_psms: bool = False, +) -> LinearPsmDataset: + """ + Convert a PSM list to a Mokapot dataset. + + Parameters + ---------- + psm_list + PSMList to rescore. + feature_names + List of feature names to use. Items must be keys in the PSM `rescoring_features` dict. + keep_lower_rank_psms + If ``True``, keep all PSMs with rank <= 2. Defaults to ``False``. + + Returns + ------- + mokapot.dataset.LinearPsmDataset + + """ + if None in psm_list["rank"]: + psm_list.set_ranks() + + psm_df = psm_list.to_dataframe() + psm_df = psm_df.reset_index(drop=True).reset_index() + if not keep_lower_rank_psms: + psm_df = psm_df[psm_df["rank"] == 1] + + psm_df["peptide"] = ( + psm_df["peptidoform"].astype(str).str.replace(r"(/\d+$)", "", n=1, regex=True) + ) + psm_df["is_target"] = ~psm_df["is_decoy"] + psm_df["charge"] = psm_df["peptidoform"].apply(lambda x: x.precursor_charge) + psm_df["calcmass"] = psm_df["peptidoform"].apply(lambda x: x.theoretical_mass) + psm_df["expmass"] = _mz_to_mass(psm_df["precursor_mz"], psm_df["charge"]) + psm_df = pd.concat( + [ + psm_df.drop(columns=["rescoring_features"]), + pd.DataFrame(list(psm_df["rescoring_features"])).fillna(0.0), + ], + axis=1, + ) + + required_columns = [ + "index", + "peptide", + "is_target", + "protein_list", + "run", + "calcmass", + "expmass", + "retention_time", + "charge", + ] + required_columns.extend(feature_names) + + lin_psm_data = LinearPsmDataset( + psms=psm_df[required_columns], + target_column="is_target", + spectrum_columns="index", # Use artificial index to allow multi-rank rescoring + peptide_column="peptide", + protein_column="protein_list", + feature_columns=feature_names, + filename_column="run", + scan_column="index", # Keep as spectrum_id? + calcmass_column="calcmass", + expmass_column="expmass", + rt_column="retention_time", + charge_column="charge", + ) + + return lin_psm_data + + +def _mz_to_mass(mz: float, charge: int) -> float: + """Convert m/z to mass.""" + return mz * charge - charge * nist_mass["H"][1][0] diff --git a/ms2rescore/rescoring_engines/percolator.py b/ms2rescore/rescoring_engines/percolator.py index e122eb81..790ad4e2 100644 --- a/ms2rescore/rescoring_engines/percolator.py +++ b/ms2rescore/rescoring_engines/percolator.py @@ -116,7 +116,7 @@ def _update_psm_scores(psm_list: psm_utils.PSMList, target_pout: str, decoy_pout def _write_pin_file(psm_list: psm_utils.PSMList, filepath: str): - """Write PIN file for rescoring""" + """Write PIN file for rescoring.""" logger.debug(f"Writing PIN file to {filepath}") psm_utils.io.write_file( psm_list, diff --git a/pyproject.toml b/pyproject.toml index 35756cfd..9663a8c8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,6 +45,7 @@ dependencies = [ "tomlkit", "psm_utils>=0.3", "customtkinter>=5,<6", + "mokapot>=0.9", ] [project.optional-dependencies]