Skip to content

Commit 1fcf236

Browse files
authored
Merge pull request #131 from wilhelm-lab/feature/add_new_iontypes
Add utilities required by Oktoberfest
2 parents 64aae2d + 700da23 commit 1fcf236

File tree

4 files changed

+133
-48
lines changed

4 files changed

+133
-48
lines changed

spectrum_fundamentals/annotation/annotation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,7 @@ def generate_annotation_matrix(
322322
exp_mass_col = matched_peaks.columns.get_loc("exp_mass")
323323

324324
for peak in matched_peaks.values:
325-
ion_type_index = ion_types.index(peak[ion_type][0])
325+
ion_type_index = ion_types.index(peak[ion_type].split("-", 1)[0])
326326
peak_pos = ((peak[no_col] - 1) * charge_const * len(ion_types)) + (peak[charge_col] - 1) + 3 * ion_type_index
327327

328328
if peak_pos >= constants.VEC_LENGTH:

spectrum_fundamentals/constants.py

Lines changed: 30 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -382,28 +382,17 @@
382382
"[UNIMOD:35]": "[Oxidation (O)]",
383383
}
384384

385-
FRAGMENTATION_ENCODING = {"HCD": 2, "CID": 1}
386-
387-
############################
388-
# GENERATION OF ANNOTATION #
389-
############################
390-
391-
IONS = ["y", "b"] # limited to single character unicode string when array is created
392-
CHARGES = [1, 2, 3] # limited to uint8 (0-255) when array is created
393-
POSITIONS = [x for x in range(1, 30)] # fragment numbers 1-29 -- limited to uint8 (0-255) when array is created
394-
395-
ANNOTATION_FRAGMENT_TYPE = []
396-
ANNOTATION_FRAGMENT_CHARGE = []
397-
ANNOTATION_FRAGMENT_NUMBER = []
398-
for pos in POSITIONS:
399-
for ion in IONS:
400-
for charge in CHARGES:
401-
ANNOTATION_FRAGMENT_TYPE.append(ion)
402-
ANNOTATION_FRAGMENT_CHARGE.append(charge)
403-
ANNOTATION_FRAGMENT_NUMBER.append(pos)
404-
405-
ANNOTATION = [ANNOTATION_FRAGMENT_TYPE, ANNOTATION_FRAGMENT_CHARGE, ANNOTATION_FRAGMENT_NUMBER]
406-
385+
FRAGMENTATION_ENCODING = {
386+
"CID": 1,
387+
"HCD": 2,
388+
"ETD": 3,
389+
"ETHCD": 4,
390+
"ETCID": 5,
391+
"UVPD": 6,
392+
"EID": 7,
393+
"ECD": 8,
394+
"AIECD": 9,
395+
}
407396

408397
########################
409398
# RESCORING PARAMETERS #
@@ -460,3 +449,22 @@ class RescoreType(Enum):
460449
"z": ATOM_MASSES["O"] - ATOM_MASSES["N"] - ATOM_MASSES["H"],
461450
"z_r": ATOM_MASSES["O"] - ATOM_MASSES["N"],
462451
}
452+
453+
############################
454+
# GENERATION OF ANNOTATION #
455+
############################
456+
457+
CHARGES = [1, 2, 3] # limited to uint8 (0-255) when array is created
458+
POSITIONS = [x for x in range(1, 30)] # fragment numbers 1-29 -- limited to uint8 (0-255) when array is created
459+
460+
ANNOTATION_FRAGMENT_TYPE = []
461+
ANNOTATION_FRAGMENT_CHARGE = []
462+
ANNOTATION_FRAGMENT_NUMBER = []
463+
for pos in POSITIONS:
464+
for ion in FRAGMENTATION_TO_IONS_BY_DIRECTION["HCD"]:
465+
for charge in CHARGES:
466+
ANNOTATION_FRAGMENT_TYPE.append(ion)
467+
ANNOTATION_FRAGMENT_CHARGE.append(charge)
468+
ANNOTATION_FRAGMENT_NUMBER.append(pos)
469+
470+
ANNOTATION = [ANNOTATION_FRAGMENT_TYPE, ANNOTATION_FRAGMENT_CHARGE, ANNOTATION_FRAGMENT_NUMBER]

spectrum_fundamentals/fragments.py

Lines changed: 69 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,14 @@
1+
import itertools
12
import logging
23
import re
34
from operator import itemgetter
4-
from typing import Dict, List, Optional, Tuple
5+
from typing import Dict, List, Literal, Optional, Tuple, Union
56

67
import numpy as np
78
import pandas as pd
89

9-
from .constants import (
10-
AA_MASSES,
11-
ATOM_MASSES,
12-
FRAGMENTATION_TO_IONS_BY_DIRECTION,
13-
FRAGMENTATION_TO_IONS_BY_PAIRS,
14-
ION_DELTAS,
15-
MOD_MASSES,
16-
PARTICLE_MASSES,
17-
)
10+
import spectrum_fundamentals.constants as c
11+
1812
from .mod_string import internal_without_mods
1913

2014
logger = logging.getLogger(__name__)
@@ -46,7 +40,7 @@ def _get_modifications(peptide_sequence: str, custom_mods: Optional[Dict[str, fl
4640
pattern = re.compile(r"\[.{8}[^\]]*\]")
4741
matches = pattern.finditer(peptide_sequence)
4842

49-
mod_masses = MOD_MASSES | (custom_mods or {})
43+
mod_masses = c.MOD_MASSES | (custom_mods or {})
5044

5145
for match in matches:
5246
start_pos, end_pos = match.span()
@@ -64,14 +58,14 @@ def compute_peptide_mass(sequence: str, custom_mods: Optional[Dict[str, float]]
6458
:param custom_mods: Custom Modifications with the identifier, the unimod equivalent and the respective mass
6559
:return: Theoretical mass of the sequence
6660
"""
67-
terminal_masses = 2 * ATOM_MASSES["H"] + ATOM_MASSES["O"] # add terminal masses HO- and H-
61+
terminal_masses = 2 * c.ATOM_MASSES["H"] + c.ATOM_MASSES["O"] # add terminal masses HO- and H-
6862

6963
modification_deltas = _get_modifications(sequence, custom_mods=custom_mods)
7064
if modification_deltas: # there were modifictions
7165
sequence = internal_without_mods([sequence])[0]
7266
terminal_masses += modification_deltas.get(-2, 0.0) # prime with n_term_mod delta if present
7367

74-
peptide_sum = sum([AA_MASSES[c] + modification_deltas.get(i, 0.0) for i, c in enumerate(sequence)])
68+
peptide_sum = sum([c.AA_MASSES[aa] + modification_deltas.get(i, 0.0) for i, aa in enumerate(sequence)])
7569

7670
return terminal_masses + peptide_sum
7771

@@ -98,12 +92,12 @@ def retrieve_ion_types(fragmentation_method: str) -> List[str]:
9892
9993
Given the fragmentation method the function returns all ion types that can result from it.
10094
101-
: param fragmentation_method: fragmentation method used during the MS
102-
: raises ValueError: if fragmentation_method is other than one of HCD, CID, ETD, ECD, ETCID, ETHCD, UVPD
103-
: return: list of possible ion types
95+
:param fragmentation_method: fragmentation method used during the MS
96+
:raises ValueError: if fragmentation_method is not supported
97+
:return: list of possible ion types
10498
"""
10599
fragmentation_method = fragmentation_method.upper()
106-
ions = FRAGMENTATION_TO_IONS_BY_PAIRS.get(fragmentation_method, [])
100+
ions = c.FRAGMENTATION_TO_IONS_BY_PAIRS.get(fragmentation_method, [])
107101
if not ions:
108102
raise ValueError(f"Unknown fragmentation method provided: {fragmentation_method}")
109103
return ions
@@ -115,12 +109,12 @@ def retrieve_ion_types_for_peak_initialization(fragmentation_method: str) -> Lis
115109
116110
Given the fragmentation method the function returns all ion types that can result from it.
117111
118-
: param fragmentation_method: fragmentation method used during the MS
119-
: raises ValueError: if fragmentation_method is other than one of HCD, CID, ETD, ECD, ETCID, ETHCD, UVPD
120-
: return: list of possible ion types
112+
:param fragmentation_method: fragmentation method used during the MS
113+
:raises ValueError: if fragmentation_method is not supported
114+
:return: list of possible ion types
121115
"""
122116
fragmentation_method = fragmentation_method.upper()
123-
ions = FRAGMENTATION_TO_IONS_BY_DIRECTION.get(fragmentation_method, [])
117+
ions = c.FRAGMENTATION_TO_IONS_BY_DIRECTION.get(fragmentation_method, [])
124118
if not ions:
125119
raise ValueError(f"Unknown fragmentation method provided: {fragmentation_method}")
126120
return ions
@@ -133,7 +127,7 @@ def get_ion_delta(ion_types: List[str]) -> np.ndarray:
133127
:param ion_types: type of ions for which mass should be calculated
134128
:return: numpy array with masses of the ions
135129
"""
136-
return np.array([ION_DELTAS[ion_type] for ion_type in ion_types]).reshape(len(ion_types), 1)
130+
return np.array([c.ION_DELTAS[ion_type] for ion_type in ion_types]).reshape(len(ion_types), 1)
137131

138132

139133
def initialize_peaks(
@@ -187,7 +181,7 @@ def initialize_peaks(
187181
# add n_term mass to first aa for easy processing in the following calculation
188182
modification_deltas[0] = modification_deltas.get(0, 0.0) + n_term_delta
189183

190-
mass_arr = np.array([AA_MASSES[_] for _ in sequence])
184+
mass_arr = np.array([c.AA_MASSES[_] for _ in sequence])
191185
for pos, mod_mass in modification_deltas.items():
192186
mass_arr[pos] += mod_mass
193187

@@ -206,7 +200,7 @@ def initialize_peaks(
206200
# calculate for m/z for charges 1, 2, 3
207201
# shape of ion_mzs: (n_ions, n_fragments, max_charge)
208202
charges = np.arange(1, max_charge + 1)
209-
ion_mzs = (sum_array[..., np.newaxis] + charges * PARTICLE_MASSES["PROTON"]) / charges
203+
ion_mzs = (sum_array[..., np.newaxis] + charges * c.PARTICLE_MASSES["PROTON"]) / charges
210204

211205
min_mzs, max_mzs = get_min_max_mass(mass_analyzer, ion_mzs, mass_tolerance, unit_mass_tolerance)
212206

@@ -231,7 +225,7 @@ def initialize_peaks(
231225
fragments_meta_data,
232226
n_term_mod,
233227
sequence,
234-
(peptide_mass + ATOM_MASSES["O"] + 2 * ATOM_MASSES["H"]),
228+
(peptide_mass + c.ATOM_MASSES["O"] + 2 * c.ATOM_MASSES["H"]),
235229
)
236230

237231

@@ -407,3 +401,53 @@ def get_min_max_mass(
407401
else:
408402
raise ValueError(f"Unsupported mass_analyzer: {mass_analyzer}")
409403
return (min_mass, max_mass)
404+
405+
406+
FragmentIonComponent = Literal["ion_type", "position", "charge"]
407+
408+
409+
def generate_fragment_ion_annotations(
410+
ion_types: List[str], order: Tuple[FragmentIonComponent, FragmentIonComponent, FragmentIonComponent]
411+
) -> List[Tuple[str, int, int]]:
412+
"""Generate full list of fragment ions for permitted ion types and specified order.
413+
414+
:param ion_types: List of permitted ion types
415+
:param order: What fragment ion parameters (ion type, position & charge) to group the annotations by
416+
:return: List of (ion_type, position, charge) tuples sorted by specified component order
417+
:raises ValueError: if invalid or unsupported ion types are specified or duplicate order keys are used
418+
"""
419+
fragment_ion_components: Dict[str, Union[List[str]]] = {
420+
"ion_type": ion_types,
421+
"position": [str(pos) for pos in c.POSITIONS],
422+
"charge": [str(charge) for charge in c.CHARGES],
423+
}
424+
425+
if len(set(ion_types)) != len(ion_types):
426+
raise ValueError("Redundant ion types specified")
427+
elif len(ion_types) == 0:
428+
raise ValueError("No ion types specified")
429+
if set(order) != {"ion_type", "position", "charge"}:
430+
raise ValueError("Duplicate component used for ordering fragment ions")
431+
432+
raw_annotations = list(itertools.product(*[fragment_ion_components[component] for component in order]))
433+
434+
ordered_raw_annotations = [
435+
(
436+
str(combination[order.index("ion_type")]),
437+
int(combination[order.index("position")]),
438+
int(combination[order.index("charge")]),
439+
)
440+
for combination in raw_annotations
441+
]
442+
443+
return ordered_raw_annotations
444+
445+
446+
def format_fragment_ion_annotation(raw_annotation: Tuple[str, int, int]) -> str:
447+
"""Transform (ion_type, position, charge) tuple into <ion_type><position>+<charge> string.
448+
449+
:param raw_annotation: `(ion_type, position, charge)` tuple
450+
:returns: formatted annotation string
451+
"""
452+
ion_type, pos, charge = raw_annotation
453+
return f"{ion_type}{pos}+{charge}"

tests/unit_tests/test_fragments.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from numpy.testing import assert_almost_equal
66

7+
import spectrum_fundamentals.constants as c
78
import spectrum_fundamentals.fragments as fragments
89

910

@@ -143,3 +144,35 @@ def test_get_ion_types_lower_case(self):
143144
def test_invalid_fragmentation_method(self):
144145
"""Test if error is raised for invalid fragmentation method."""
145146
self.assertRaises(ValueError, fragments.retrieve_ion_types_for_peak_initialization, "XYZ")
147+
148+
149+
class TestFragmentIonAnnotation(unittest.TestCase):
150+
"""Tests for fragment ion annotation generation."""
151+
152+
def test_generate_fragment_ion_types(self):
153+
"""Test if output ordering is valid."""
154+
for ion_types in [["y", "b"], ["b", "y"], ["y", "b", "x", "a"]]:
155+
order = ("position", "ion_type", "charge")
156+
annotations = [
157+
(ion_type, pos, charge) for pos in c.POSITIONS for ion_type in ion_types for charge in c.CHARGES
158+
]
159+
with self.subTest(order=order, ion_types=ion_types):
160+
self.assertEqual(
161+
fragments.generate_fragment_ion_annotations(ion_types=ion_types, order=order), annotations
162+
)
163+
164+
order = ("ion_type", "position", "charge")
165+
annotations = [
166+
(ion_type, pos, charge) for ion_type in ion_types for pos in c.POSITIONS for charge in c.CHARGES
167+
]
168+
with self.subTest(order=order, ion_types=ion_types):
169+
self.assertEqual(
170+
fragments.generate_fragment_ion_annotations(ion_types=ion_types, order=order), annotations
171+
)
172+
173+
def test_catches_redundant_order(self):
174+
"""Check if redundant order is caught."""
175+
with self.assertRaises(ValueError):
176+
_ = fragments.generate_fragment_ion_annotations(
177+
ion_types=["y", "b"], order=("ion_type", "position", "ion_type")
178+
)

0 commit comments

Comments
 (0)