1+ import itertools
12import logging
23import re
34from operator import itemgetter
4- from typing import Dict , List , Optional , Tuple
5+ from typing import Dict , List , Literal , Optional , Tuple , Union
56
67import numpy as np
78import pandas as pd
89
9- from .constants import (
10- AA_MASSES ,
11- ATOM_MASSES ,
12- FRAGMENTATION_TO_IONS_BY_DIRECTION ,
13- FRAGMENTATION_TO_IONS_BY_PAIRS ,
14- ION_DELTAS ,
15- MOD_MASSES ,
16- PARTICLE_MASSES ,
17- )
10+ import spectrum_fundamentals .constants as c
11+
1812from .mod_string import internal_without_mods
1913
2014logger = logging .getLogger (__name__ )
@@ -46,7 +40,7 @@ def _get_modifications(peptide_sequence: str, custom_mods: Optional[Dict[str, fl
4640 pattern = re .compile (r"\[.{8}[^\]]*\]" )
4741 matches = pattern .finditer (peptide_sequence )
4842
49- mod_masses = MOD_MASSES | (custom_mods or {})
43+ mod_masses = c . MOD_MASSES | (custom_mods or {})
5044
5145 for match in matches :
5246 start_pos , end_pos = match .span ()
@@ -64,14 +58,14 @@ def compute_peptide_mass(sequence: str, custom_mods: Optional[Dict[str, float]]
6458 :param custom_mods: Custom Modifications with the identifier, the unimod equivalent and the respective mass
6559 :return: Theoretical mass of the sequence
6660 """
67- terminal_masses = 2 * ATOM_MASSES ["H" ] + ATOM_MASSES ["O" ] # add terminal masses HO- and H-
61+ terminal_masses = 2 * c . ATOM_MASSES ["H" ] + c . ATOM_MASSES ["O" ] # add terminal masses HO- and H-
6862
6963 modification_deltas = _get_modifications (sequence , custom_mods = custom_mods )
7064 if modification_deltas : # there were modifictions
7165 sequence = internal_without_mods ([sequence ])[0 ]
7266 terminal_masses += modification_deltas .get (- 2 , 0.0 ) # prime with n_term_mod delta if present
7367
74- peptide_sum = sum ([AA_MASSES [c ] + modification_deltas .get (i , 0.0 ) for i , c in enumerate (sequence )])
68+ peptide_sum = sum ([c . AA_MASSES [aa ] + modification_deltas .get (i , 0.0 ) for i , aa in enumerate (sequence )])
7569
7670 return terminal_masses + peptide_sum
7771
@@ -98,12 +92,12 @@ def retrieve_ion_types(fragmentation_method: str) -> List[str]:
9892
9993 Given the fragmentation method the function returns all ion types that can result from it.
10094
101- : param fragmentation_method: fragmentation method used during the MS
102- : raises ValueError: if fragmentation_method is other than one of HCD, CID, ETD, ECD, ETCID, ETHCD, UVPD
103- : return: list of possible ion types
95+ :param fragmentation_method: fragmentation method used during the MS
96+ :raises ValueError: if fragmentation_method is not supported
97+ :return: list of possible ion types
10498 """
10599 fragmentation_method = fragmentation_method .upper ()
106- ions = FRAGMENTATION_TO_IONS_BY_PAIRS .get (fragmentation_method , [])
100+ ions = c . FRAGMENTATION_TO_IONS_BY_PAIRS .get (fragmentation_method , [])
107101 if not ions :
108102 raise ValueError (f"Unknown fragmentation method provided: { fragmentation_method } " )
109103 return ions
@@ -115,12 +109,12 @@ def retrieve_ion_types_for_peak_initialization(fragmentation_method: str) -> Lis
115109
116110 Given the fragmentation method the function returns all ion types that can result from it.
117111
118- : param fragmentation_method: fragmentation method used during the MS
119- : raises ValueError: if fragmentation_method is other than one of HCD, CID, ETD, ECD, ETCID, ETHCD, UVPD
120- : return: list of possible ion types
112+ :param fragmentation_method: fragmentation method used during the MS
113+ :raises ValueError: if fragmentation_method is not supported
114+ :return: list of possible ion types
121115 """
122116 fragmentation_method = fragmentation_method .upper ()
123- ions = FRAGMENTATION_TO_IONS_BY_DIRECTION .get (fragmentation_method , [])
117+ ions = c . FRAGMENTATION_TO_IONS_BY_DIRECTION .get (fragmentation_method , [])
124118 if not ions :
125119 raise ValueError (f"Unknown fragmentation method provided: { fragmentation_method } " )
126120 return ions
@@ -133,7 +127,7 @@ def get_ion_delta(ion_types: List[str]) -> np.ndarray:
133127 :param ion_types: type of ions for which mass should be calculated
134128 :return: numpy array with masses of the ions
135129 """
136- return np .array ([ION_DELTAS [ion_type ] for ion_type in ion_types ]).reshape (len (ion_types ), 1 )
130+ return np .array ([c . ION_DELTAS [ion_type ] for ion_type in ion_types ]).reshape (len (ion_types ), 1 )
137131
138132
139133def initialize_peaks (
@@ -187,7 +181,7 @@ def initialize_peaks(
187181 # add n_term mass to first aa for easy processing in the following calculation
188182 modification_deltas [0 ] = modification_deltas .get (0 , 0.0 ) + n_term_delta
189183
190- mass_arr = np .array ([AA_MASSES [_ ] for _ in sequence ])
184+ mass_arr = np .array ([c . AA_MASSES [_ ] for _ in sequence ])
191185 for pos , mod_mass in modification_deltas .items ():
192186 mass_arr [pos ] += mod_mass
193187
@@ -206,7 +200,7 @@ def initialize_peaks(
206200 # calculate for m/z for charges 1, 2, 3
207201 # shape of ion_mzs: (n_ions, n_fragments, max_charge)
208202 charges = np .arange (1 , max_charge + 1 )
209- ion_mzs = (sum_array [..., np .newaxis ] + charges * PARTICLE_MASSES ["PROTON" ]) / charges
203+ ion_mzs = (sum_array [..., np .newaxis ] + charges * c . PARTICLE_MASSES ["PROTON" ]) / charges
210204
211205 min_mzs , max_mzs = get_min_max_mass (mass_analyzer , ion_mzs , mass_tolerance , unit_mass_tolerance )
212206
@@ -231,7 +225,7 @@ def initialize_peaks(
231225 fragments_meta_data ,
232226 n_term_mod ,
233227 sequence ,
234- (peptide_mass + ATOM_MASSES ["O" ] + 2 * ATOM_MASSES ["H" ]),
228+ (peptide_mass + c . ATOM_MASSES ["O" ] + 2 * c . ATOM_MASSES ["H" ]),
235229 )
236230
237231
@@ -407,3 +401,53 @@ def get_min_max_mass(
407401 else :
408402 raise ValueError (f"Unsupported mass_analyzer: { mass_analyzer } " )
409403 return (min_mass , max_mass )
404+
405+
406+ FragmentIonComponent = Literal ["ion_type" , "position" , "charge" ]
407+
408+
409+ def generate_fragment_ion_annotations (
410+ ion_types : List [str ], order : Tuple [FragmentIonComponent , FragmentIonComponent , FragmentIonComponent ]
411+ ) -> List [Tuple [str , int , int ]]:
412+ """Generate full list of fragment ions for permitted ion types and specified order.
413+
414+ :param ion_types: List of permitted ion types
415+ :param order: What fragment ion parameters (ion type, position & charge) to group the annotations by
416+ :return: List of (ion_type, position, charge) tuples sorted by specified component order
417+ :raises ValueError: if invalid or unsupported ion types are specified or duplicate order keys are used
418+ """
419+ fragment_ion_components : Dict [str , Union [List [str ]]] = {
420+ "ion_type" : ion_types ,
421+ "position" : [str (pos ) for pos in c .POSITIONS ],
422+ "charge" : [str (charge ) for charge in c .CHARGES ],
423+ }
424+
425+ if len (set (ion_types )) != len (ion_types ):
426+ raise ValueError ("Redundant ion types specified" )
427+ elif len (ion_types ) == 0 :
428+ raise ValueError ("No ion types specified" )
429+ if set (order ) != {"ion_type" , "position" , "charge" }:
430+ raise ValueError ("Duplicate component used for ordering fragment ions" )
431+
432+ raw_annotations = list (itertools .product (* [fragment_ion_components [component ] for component in order ]))
433+
434+ ordered_raw_annotations = [
435+ (
436+ str (combination [order .index ("ion_type" )]),
437+ int (combination [order .index ("position" )]),
438+ int (combination [order .index ("charge" )]),
439+ )
440+ for combination in raw_annotations
441+ ]
442+
443+ return ordered_raw_annotations
444+
445+
446+ def format_fragment_ion_annotation (raw_annotation : Tuple [str , int , int ]) -> str :
447+ """Transform (ion_type, position, charge) tuple into <ion_type><position>+<charge> string.
448+
449+ :param raw_annotation: `(ion_type, position, charge)` tuple
450+ :returns: formatted annotation string
451+ """
452+ ion_type , pos , charge = raw_annotation
453+ return f"{ ion_type } { pos } +{ charge } "
0 commit comments