@@ -17,7 +17,7 @@ def match_peaks(
1717 tmt_n_term : int ,
1818 unmod_sequence : str ,
1919 charge : int ,
20- ) -> List [Dict [str , Union [str , int , float ]]]:
20+ ) -> Tuple [ List [Dict [str , Union [str , int , float ]]], int ]:
2121 """
2222 Matching experimental peaks with theoretical fragment ions.
2323
@@ -36,6 +36,7 @@ def match_peaks(
3636 temp_list = []
3737 next_start_peak = 0
3838 matched_peak = False
39+ count_annotated_nl = 0
3940 fragment_no : float
4041 for fragment in fragments_meta_data :
4142 min_mass = fragment ["min_mass" ]
@@ -56,27 +57,32 @@ def match_peaks(
5657 if (
5758 not (fragment ["ion_type" ][0 ] == "b" and fragment_no == 1 )
5859 or (unmod_sequence [0 ] == "R" or unmod_sequence [0 ] == "H" or unmod_sequence [0 ] == "K" )
59- and (tmt_n_term == 1 )
60+ or (tmt_n_term == 2 )
6061 ):
61- row_list .append (
62- {
63- "ion_type" : fragment ["ion_type" ],
64- "no" : fragment_no ,
65- "charge" : fragment ["charge" ],
66- "exp_mass" : peak_mass ,
67- "theoretical_mass" : fragment ["mass" ],
68- "intensity" : peak_intensity ,
69- }
70- )
71- if peak_intensity > max_intensity :
72- max_intensity = float (peak_intensity )
62+ # For now only counting neutral loss peaks this can change with different models later
63+ if fragment ["neutral_loss" ] == "" :
64+ row_list .append (
65+ {
66+ "ion_type" : fragment ["ion_type" ],
67+ "no" : fragment_no ,
68+ "charge" : fragment ["charge" ],
69+ "exp_mass" : peak_mass ,
70+ "theoretical_mass" : fragment ["mass" ],
71+ "intensity" : peak_intensity ,
72+ }
73+ )
74+ if peak_intensity > max_intensity :
75+ max_intensity = float (peak_intensity )
76+ else :
77+ count_annotated_nl += 1
78+
7379 matched_peak = True
7480 next_start_peak = start_peak
7581 start_peak += 1
7682 for row in row_list :
7783 row ["intensity" ] = float (row ["intensity" ]) / max_intensity
7884 temp_list .append (row )
79- return temp_list
85+ return temp_list , count_annotated_nl
8086
8187
8288def handle_multiple_matches (
@@ -123,6 +129,7 @@ def annotate_spectra(
123129 unit_mass_tolerance : Optional [str ] = None ,
124130 custom_mods : Optional [Dict [str , float ]] = None ,
125131 fragmentation_method : str = "HCD" ,
132+ annotate_neutral_loss : Optional [bool ] = False ,
126133) -> pd .DataFrame :
127134 """
128135 Annotate a set of spectra.
@@ -143,6 +150,7 @@ def annotate_spectra(
143150 :param unit_mass_tolerance: unit for the mass tolerance (da or ppm)
144151 :param fragmentation_method: fragmentation method that was used
145152 :param custom_mods: mapping of custom UNIMOD string identifiers ('[UNIMOD:xyz]') to their mass
153+ :param annotate_neutral_loss: flag to indicate whether to annotate neutral losses or not
146154 :return: a Pandas DataFrame containing the annotated spectra with meta data
147155 """
148156 raw_file_annotations = []
@@ -155,14 +163,22 @@ def annotate_spectra(
155163 unit_mass_tolerance ,
156164 fragmentation_method = fragmentation_method ,
157165 custom_mods = custom_mods ,
166+ annotate_neutral_losses = annotate_neutral_loss ,
158167 )
159168 if not results :
160169 continue
161170 raw_file_annotations .append (results )
162171 results_df = pd .DataFrame (raw_file_annotations )
163172
164173 if "CROSSLINKER_TYPE" not in index_columns :
165- results_df .columns = ["INTENSITIES" , "MZ" , "CALCULATED_MASS" , "removed_peaks" ]
174+ results_df .columns = [
175+ "INTENSITIES" ,
176+ "MZ" ,
177+ "CALCULATED_MASS" ,
178+ "removed_peaks" ,
179+ "ANNOTATED_NL_COUNT" ,
180+ "EXPECTED_NL_COUNT" ,
181+ ]
166182 else :
167183 results_df .columns = [
168184 "INTENSITIES_A" ,
@@ -347,9 +363,10 @@ def parallel_annotate(
347363 unit_mass_tolerance : Optional [str ] = None ,
348364 custom_mods : Optional [Dict [str , float ]] = None ,
349365 fragmentation_method : str = "HCD" ,
366+ annotate_neutral_losses : Optional [bool ] = False ,
350367) -> Optional [
351368 Union [
352- Tuple [np .ndarray , np .ndarray , float , int ],
369+ Tuple [np .ndarray , np .ndarray , float , int , int , int ],
353370 Tuple [np .ndarray , np .ndarray , np .ndarray , np .ndarray , float , float , int , int ],
354371 ]
355372]:
@@ -369,6 +386,7 @@ def parallel_annotate(
369386 :param unit_mass_tolerance: unit for the mass tolerance (da or ppm)
370387 :param custom_mods: mapping of custom UNIMOD string identifiers ('[UNIMOD:xyz]') to their mass
371388 :param fragmentation_method: fragmentation method that was used
389+ :param annotate_neutral_losses: flag to indicate whether to annotate neutral losses or not
372390 :return: a tuple containing intensity values (np.ndarray), masses (np.ndarray), calculated mass (float),
373391 and any removed peaks (List[str])
374392 """
@@ -383,6 +401,7 @@ def parallel_annotate(
383401 unit_mass_tolerance ,
384402 fragmentation_method = fragmentation_method ,
385403 custom_mods = custom_mods ,
404+ add_neutral_losses = annotate_neutral_losses ,
386405 )
387406
388407 if (spectrum [index_columns ["PEPTIDE_LENGTH_A" ]] > 30 ) or (spectrum [index_columns ["PEPTIDE_LENGTH_B" ]] > 30 ):
@@ -399,6 +418,7 @@ def _annotate_linear_spectrum(
399418 unit_mass_tolerance : Optional [str ],
400419 custom_mods : Optional [Dict [str , float ]] = None ,
401420 fragmentation_method : str = "HCD" ,
421+ add_neutral_losses : Optional [bool ] = False ,
402422):
403423 """
404424 Annotate a linear peptide spectrum.
@@ -409,21 +429,24 @@ def _annotate_linear_spectrum(
409429 :param unit_mass_tolerance: Unit for the mass tolerance (da or ppm)
410430 :param custom_mods: mapping of custom UNIMOD string identifiers ('[UNIMOD:xyz]') to their mass
411431 :param fragmentation_method: fragmentation method that was used
432+ :param add_neutral_losses: flag to indicate whether to annotate neutral losses or not
412433 :return: Annotated spectrum
413434 """
414435 mod_seq_column = "MODIFIED_SEQUENCE"
415436 if "MODIFIED_SEQUENCE_MSA" in index_columns :
416437 mod_seq_column = "MODIFIED_SEQUENCE_MSA"
417- fragments_meta_data , tmt_n_term , unmod_sequence , calc_mass = initialize_peaks (
438+
439+ fragments_meta_data , tmt_n_term , unmod_sequence , calc_mass , expected_nl = initialize_peaks (
418440 sequence = spectrum [index_columns [mod_seq_column ]],
419441 mass_analyzer = spectrum [index_columns ["MASS_ANALYZER" ]],
420442 charge = spectrum [index_columns ["PRECURSOR_CHARGE" ]],
421443 mass_tolerance = mass_tolerance ,
422444 unit_mass_tolerance = unit_mass_tolerance ,
423445 fragmentation_method = fragmentation_method ,
424446 custom_mods = custom_mods ,
447+ add_neutral_losses = add_neutral_losses ,
425448 )
426- matched_peaks = match_peaks (
449+ matched_peaks , count_annotated_nl = match_peaks (
427450 fragments_meta_data ,
428451 spectrum [index_columns ["INTENSITIES" ]],
429452 spectrum [index_columns ["MZ" ]],
@@ -439,13 +462,13 @@ def _annotate_linear_spectrum(
439462 if len (matched_peaks ) == 0 :
440463 intensity = np .full (vec_length , 0.0 )
441464 mass = np .full (vec_length , 0.0 )
442- return intensity , mass , calc_mass , 0
465+ return intensity , mass , calc_mass , 0 , 0 , 0
443466
444467 matched_peaks , removed_peaks = handle_multiple_matches (matched_peaks )
445468 intensities , mass = generate_annotation_matrix (
446469 matched_peaks , unmod_sequence , spectrum [index_columns ["PRECURSOR_CHARGE" ]], fragmentation_method
447470 )
448- return intensities , mass , calc_mass , removed_peaks
471+ return intensities , mass , calc_mass , removed_peaks , count_annotated_nl , expected_nl
449472
450473
451474def _annotate_crosslinked_spectrum (
@@ -493,7 +516,7 @@ def _xl_annotation_workflow(seq_id: str, non_cl_xl: bool):
493516 array_size = 348
494517 inputs .append (custom_mods )
495518 fragments_meta_data , tmt_n_term , unmod_sequence , calc_mass = initialize_peaks_xl (* inputs )
496- matched_peaks = match_peaks (
519+ matched_peaks , annotated_nl = match_peaks (
497520 fragments_meta_data ,
498521 np .array (spectrum [index_columns ["INTENSITIES" ]]),
499522 np .array (spectrum [index_columns ["MZ" ]]), # Convert to numpy array
0 commit comments