Skip to content

Fix Fragpipe proteoform parsing #124

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 19 additions & 16 deletions psm_utils/io/fragpipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from abc import ABC
from pathlib import Path
from typing import Iterable, Optional
from pyteomics.proforma import MassModification, to_proforma

from psm_utils.io._base_classes import ReaderBase
from psm_utils.io._utils import set_csv_field_size_limit
Expand Down Expand Up @@ -76,7 +77,7 @@ def _get_peptide_spectrum_match(self, psm_dict) -> PSM:

return PSM(
peptidoform=self._parse_peptidoform(
psm_dict["Modified Peptide"], psm_dict["Peptide"], psm_dict["Charge"]
psm_dict["Peptide"], psm_dict["Assigned Modifications"], psm_dict["Charge"]
),
spectrum_id=self._parse_spectrum_id(psm_dict["Spectrum"]),
run=self._parse_run(psm_dict["Spectrum File"]),
Expand All @@ -98,22 +99,24 @@ def _get_peptide_spectrum_match(self, psm_dict) -> PSM:
)

@staticmethod
def _parse_peptidoform(mod_peptide: str, peptide: str, charge: Optional[str]) -> str:
def _parse_peptidoform(peptide: str, modifications: str, charge: Optional[str]) -> str:
"""Parse the peptidoform from the modified peptide, peptide, and charge columns."""
if mod_peptide:
peptide = mod_peptide
# N-terminal modification
if peptide.startswith("n"):
peptide = peptide[1:]
# A hyphen needs to be added after the N-terminal modification, thus after the ]
peptide = peptide.replace("]", "]-", 1)
# C-terminal modification
if peptide.endswith("]"):
if "c[" in peptide:
peptide = peptide.replace("c[", "-[", 1)
if charge:
peptide += f"/{int(float(charge))}"
return peptide
sequence = [(aa, []) for aa in peptide]
n_term, c_term = [], []
for mod_entry in modifications.split(", "):
if mod_entry:
site, mass = mod_entry[:-1].split("(")
mass = float(mass)
if site == "N-term":
n_term.append(MassModification(mass))
elif site == "C-term":
c_term.append(MassModification(mass))
else:
res = site[-1]
idx = int(site[:-1]) - 1
assert sequence[idx][0] == res
sequence[idx][1].append(MassModification(mass))
return to_proforma(sequence, n_term=n_term, c_term=c_term, charge_state=charge)

@staticmethod
def _parse_spectrum_id(spectrum: str) -> str:
Expand Down
12 changes: 5 additions & 7 deletions tests/test_io/test_fragpipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,14 @@ def test_iter(self):

def test__parse_peptidoform(self):
test_cases = [
(("LHM[147]TNQNMEKc[17]", "LHMTNQNMEK", "3"), "LHM[147]TNQNMEK-[17]/3"),
(("n[43]ANIAVQR", "ANIAVQR", "2"), "[43]-ANIAVQR/2"),
((None, "IPAVTYPK", "2"), "IPAVTYPK/2"),
(("", "IPAVTYPK", "2"), "IPAVTYPK/2"),
(("", "IPAVTYPK", 2), "IPAVTYPK/2"),
(("LHMTNQNMEK", "3M(15.994915), C-term(17.034480)", "3"), "LHM[+15.9949]TNQNMEK-[+17.0345]/3"),
(("ANIAVQR", "N-term(42.0106)", "2"), "[+42.0106]-ANIAVQR/2"),
(("IPAVTYPK", "", "2"), "IPAVTYPK/2"),
]

reader = FragPipeReader("./tests/test_data/test_fragpipe.tsv")
for (peptide, modified_peptide, charge), expected in test_cases:
assert reader._parse_peptidoform(peptide, modified_peptide, charge) == expected
for (peptide, modifications, charge), expected in test_cases:
assert reader._parse_peptidoform(peptide, modifications, charge) == expected

def test__parse_spectrum_id(self):
test_cases = [
Expand Down
Loading