Skip to content

Commit 46bd487

Browse files
authored
Merge pull request #190 from wilhelm-lab/fix/xl_noncleavable
Fix/xl noncleavable
2 parents ffb881b + 4e5ea34 commit 46bd487

File tree

6 files changed

+168
-76
lines changed

6 files changed

+168
-76
lines changed

spectrum_fundamentals/constants.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,8 @@
132132
"cm": "[UNIMOD:4]",
133133
"dsso": "[UNIMOD:1896]",
134134
"dsbu": "[UNIMOD:1884]",
135+
"dss": "[UNIMOD:1898]",
136+
"bs3": "[UNIMOD:1898]",
135137
}
136138

137139
#######################

spectrum_fundamentals/metrics/fragments_ratio.py

Lines changed: 117 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ class FragmentsRatio(Metric):
3333
def count_with_ion_mask(
3434
boolean_array: scipy.sparse.csr_matrix,
3535
ion_mask: Optional[Union[np.ndarray, scipy.sparse.spmatrix]] = None,
36-
xl: bool = False,
36+
cms2: bool = False,
3737
) -> np.ndarray:
3838
"""
3939
Count the number of ions.
@@ -43,10 +43,10 @@ def count_with_ion_mask(
4343
:param ion_mask: mask with 1s for the ions that should be counted and 0s for ions that should be ignored, \
4444
integer array of length 174 for linear and 348 for crosslinked peptides, or a list of integers,
4545
or a scipy.sparse.csr_matrix or scipy.sparse._csc.csc_matrix.
46-
:param xl: whether to process with crosslinked or linear peptides
46+
:param cms2: whether to process with cleavable crosslinked or linear peptides
4747
:return: number of observed/predicted peaks not masked by ion_mask
4848
"""
49-
if xl:
49+
if cms2:
5050
array_size = 348
5151
else:
5252
array_size = 174
@@ -62,7 +62,7 @@ def count_observation_states(
6262
observation_state: scipy.sparse.csr_matrix,
6363
test_state: int,
6464
ion_mask: Optional[Union[np.ndarray, scipy.sparse.csr_matrix]] = None,
65-
xl: bool = False,
65+
cms2: bool = False,
6666
) -> np.ndarray:
6767
"""
6868
Count the number of observation states.
@@ -71,11 +71,11 @@ def count_observation_states(
7171
:param test_state: integer for the test observation state
7272
:param ion_mask: mask with 1s for the ions that should be counted and 0s for ions that should be ignored, \
7373
integer array of length 174
74-
:param xl: whether or not the function is executed with xl mode
74+
:param cms2: whether or not the function is executed with cms2 mode
7575
:return: number of observation states equal to test_state per row
7676
"""
7777
state_boolean = observation_state == test_state
78-
return FragmentsRatio.count_with_ion_mask(state_boolean, ion_mask, xl=xl)
78+
return FragmentsRatio.count_with_ion_mask(state_boolean, ion_mask, cms2=cms2)
7979

8080
@staticmethod
8181
def get_mask_observed_valid(observed_mz: scipy.sparse.csr_matrix) -> scipy.sparse.csr_matrix:
@@ -136,15 +136,19 @@ def get_observation_state(
136136
)
137137
return observation_state
138138

139-
def calc(self, xl: bool = False):
139+
def calc(self, xl: bool = False, cms2: bool = False):
140140
"""Adds columns with count, fraction and fraction_predicted features to metrics_val dataframe."""
141141
if self.true_intensities is None or self.pred_intensities is None:
142142
return None
143143
if xl:
144-
true_intensities_a = self.true_intensities[:, 0:348]
145-
true_intensities_b = self.true_intensities[:, 348:]
146-
pred_intensities_a = self.pred_intensities[:, 0:348]
147-
pred_intensities_b = self.pred_intensities[:, 348:]
144+
if cms2:
145+
max_length = 348
146+
else:
147+
max_length = 174
148+
true_intensities_a = self.true_intensities[:, 0:max_length]
149+
true_intensities_b = self.true_intensities[:, max_length:]
150+
pred_intensities_a = self.pred_intensities[:, 0:max_length]
151+
pred_intensities_b = self.pred_intensities[:, max_length:]
148152
mask_observed_valid_a = FragmentsRatio.get_mask_observed_valid(true_intensities_a)
149153
mask_observed_valid_b = FragmentsRatio.get_mask_observed_valid(true_intensities_b)
150154
observed_boolean_a = FragmentsRatio.make_boolean(true_intensities_a, mask_observed_valid_a)
@@ -157,120 +161,180 @@ def calc(self, xl: bool = False):
157161
observation_state_b = FragmentsRatio.get_observation_state(
158162
observed_boolean_b, predicted_boolean_b, mask_observed_valid_b
159163
)
160-
valid_ions_a = np.maximum(1, FragmentsRatio.count_with_ion_mask(mask_observed_valid_a, xl=True))
161-
valid_ions_b = np.maximum(1, FragmentsRatio.count_with_ion_mask(mask_observed_valid_b, xl=True))
164+
valid_ions_a = np.maximum(1, FragmentsRatio.count_with_ion_mask(mask_observed_valid_a, cms2=cms2))
165+
valid_ions_b = np.maximum(1, FragmentsRatio.count_with_ion_mask(mask_observed_valid_b, cms2=cms2))
162166
valid_ions_b_a = np.maximum(
163-
1, FragmentsRatio.count_with_ion_mask(mask_observed_valid_a, constants.B_ION_MASK_XL, xl=True)
167+
1,
168+
FragmentsRatio.count_with_ion_mask(
169+
mask_observed_valid_a, constants.B_ION_MASK_XL if cms2 else constants.B_ION_MASK, cms2=cms2
170+
),
164171
)
165172
valid_ions_b_b = np.maximum(
166-
1, FragmentsRatio.count_with_ion_mask(mask_observed_valid_b, constants.B_ION_MASK_XL, xl=True)
173+
1,
174+
FragmentsRatio.count_with_ion_mask(
175+
mask_observed_valid_b, constants.B_ION_MASK_XL if cms2 else constants.B_ION_MASK, cms2=cms2
176+
),
167177
)
168178
valid_ions_y_a = np.maximum(
169-
1, FragmentsRatio.count_with_ion_mask(mask_observed_valid_a, constants.Y_ION_MASK_XL, xl=True)
179+
1,
180+
FragmentsRatio.count_with_ion_mask(
181+
mask_observed_valid_a, constants.Y_ION_MASK_XL if cms2 else constants.Y_ION_MASK, cms2=cms2
182+
),
170183
)
171184
valid_ions_y_b = np.maximum(
172-
1, FragmentsRatio.count_with_ion_mask(mask_observed_valid_b, constants.Y_ION_MASK_XL, xl=True)
185+
1,
186+
FragmentsRatio.count_with_ion_mask(
187+
mask_observed_valid_b, constants.Y_ION_MASK_XL if cms2 else constants.Y_ION_MASK, cms2=cms2
188+
),
173189
)
174190
# counting metrics
175-
self.metrics_val["count_predicted_a"] = FragmentsRatio.count_with_ion_mask(predicted_boolean_a, xl=True)
176-
self.metrics_val["count_predicted_b"] = FragmentsRatio.count_with_ion_mask(predicted_boolean_b, xl=True)
191+
self.metrics_val["count_predicted_a"] = FragmentsRatio.count_with_ion_mask(predicted_boolean_a, cms2=cms2)
192+
self.metrics_val["count_predicted_b"] = FragmentsRatio.count_with_ion_mask(predicted_boolean_b, cms2=cms2)
177193
self.metrics_val["count_predicted_b_a"] = FragmentsRatio.count_with_ion_mask(
178-
predicted_boolean_a, constants.B_ION_MASK_XL, xl=True
194+
predicted_boolean_a, constants.B_ION_MASK_XL if cms2 else constants.B_ION_MASK, cms2=cms2
179195
)
180196
self.metrics_val["count_predicted_b_b"] = FragmentsRatio.count_with_ion_mask(
181-
predicted_boolean_b, constants.B_ION_MASK_XL, xl=True
197+
predicted_boolean_b, constants.B_ION_MASK_XL if cms2 else constants.B_ION_MASK, cms2=cms2
182198
)
183199
self.metrics_val["count_predicted_y_a"] = FragmentsRatio.count_with_ion_mask(
184-
predicted_boolean_a, constants.Y_ION_MASK_XL, xl=True
200+
predicted_boolean_a, constants.Y_ION_MASK_XL if cms2 else constants.Y_ION_MASK, cms2=cms2
185201
)
186202
self.metrics_val["count_predicted_y_b"] = FragmentsRatio.count_with_ion_mask(
187-
predicted_boolean_b, constants.Y_ION_MASK_XL, xl=True
203+
predicted_boolean_b, constants.Y_ION_MASK_XL if cms2 else constants.Y_ION_MASK, cms2=cms2
188204
)
189-
self.metrics_val["count_observed_a"] = FragmentsRatio.count_with_ion_mask(observed_boolean_a, xl=True)
190-
self.metrics_val["count_observed_b"] = FragmentsRatio.count_with_ion_mask(observed_boolean_b, xl=True)
205+
self.metrics_val["count_observed_a"] = FragmentsRatio.count_with_ion_mask(observed_boolean_a, cms2=cms2)
206+
self.metrics_val["count_observed_b"] = FragmentsRatio.count_with_ion_mask(observed_boolean_b, cms2=cms2)
191207
self.metrics_val["count_observed_b_a"] = FragmentsRatio.count_with_ion_mask(
192-
observed_boolean_a, constants.B_ION_MASK_XL, xl=True
208+
observed_boolean_a, constants.B_ION_MASK_XL if cms2 else constants.B_ION_MASK, cms2=cms2
193209
)
194210
self.metrics_val["count_observed_b_b"] = FragmentsRatio.count_with_ion_mask(
195-
observed_boolean_b, constants.B_ION_MASK_XL, xl=True
211+
observed_boolean_b, constants.B_ION_MASK_XL if cms2 else constants.B_ION_MASK, cms2=cms2
196212
)
197213
self.metrics_val["count_observed_y_a"] = FragmentsRatio.count_with_ion_mask(
198-
observed_boolean_a, constants.Y_ION_MASK_XL, xl=True
214+
observed_boolean_a, constants.Y_ION_MASK_XL if cms2 else constants.Y_ION_MASK, cms2=cms2
199215
)
200216
self.metrics_val["count_observed_y_b"] = FragmentsRatio.count_with_ion_mask(
201-
observed_boolean_b, constants.Y_ION_MASK_XL, xl=True
217+
observed_boolean_b, constants.Y_ION_MASK_XL if cms2 else constants.Y_ION_MASK, cms2=cms2
202218
)
203219
self.metrics_val["count_observed_and_predicted_a"] = FragmentsRatio.count_observation_states(
204-
observation_state_a, ObservationState.OBS_AND_PRED, xl=True
220+
observation_state_a, ObservationState.OBS_AND_PRED, cms2=cms2
205221
)
206222
self.metrics_val["count_observed_and_predicted_b"] = FragmentsRatio.count_observation_states(
207-
observation_state_b, ObservationState.OBS_AND_PRED, xl=True
223+
observation_state_b, ObservationState.OBS_AND_PRED, cms2=cms2
208224
)
209225
self.metrics_val["count_observed_and_predicted_b_a"] = FragmentsRatio.count_observation_states(
210-
observation_state_a, ObservationState.OBS_AND_PRED, constants.B_ION_MASK_XL, xl=True
226+
observation_state_a,
227+
ObservationState.OBS_AND_PRED,
228+
constants.B_ION_MASK_XL if cms2 else constants.B_ION_MASK,
229+
cms2=cms2,
211230
)
212231
self.metrics_val["count_observed_and_predicted_b_b"] = FragmentsRatio.count_observation_states(
213-
observation_state_b, ObservationState.OBS_AND_PRED, constants.B_ION_MASK_XL, xl=True
232+
observation_state_b,
233+
ObservationState.OBS_AND_PRED,
234+
constants.B_ION_MASK_XL if cms2 else constants.B_ION_MASK,
235+
cms2=cms2,
214236
)
215237
self.metrics_val["count_observed_and_predicted_y_a"] = FragmentsRatio.count_observation_states(
216-
observation_state_a, ObservationState.OBS_AND_PRED, constants.Y_ION_MASK_XL, xl=True
238+
observation_state_a,
239+
ObservationState.OBS_AND_PRED,
240+
constants.Y_ION_MASK_XL if cms2 else constants.Y_ION_MASK,
241+
cms2=cms2,
217242
)
218243
self.metrics_val["count_observed_and_predicted_y_b"] = FragmentsRatio.count_observation_states(
219-
observation_state_b, ObservationState.OBS_AND_PRED, constants.Y_ION_MASK_XL, xl=True
244+
observation_state_b,
245+
ObservationState.OBS_AND_PRED,
246+
constants.Y_ION_MASK_XL if cms2 else constants.Y_ION_MASK,
247+
cms2=cms2,
220248
)
221249
self.metrics_val["count_not_observed_and_not_predicted_a"] = FragmentsRatio.count_observation_states(
222-
observation_state_a, ObservationState.NOT_OBS_AND_NOT_PRED, xl=True
250+
observation_state_a, ObservationState.NOT_OBS_AND_NOT_PRED, cms2=cms2
223251
)
224252
self.metrics_val["count_not_observed_and_not_predicted_b"] = FragmentsRatio.count_observation_states(
225-
observation_state_b, ObservationState.NOT_OBS_AND_NOT_PRED, xl=True
253+
observation_state_b, ObservationState.NOT_OBS_AND_NOT_PRED, cms2=cms2
226254
)
227255
self.metrics_val["count_not_observed_and_not_predicted_b_a"] = FragmentsRatio.count_observation_states(
228-
observation_state_a, ObservationState.NOT_OBS_AND_NOT_PRED, constants.B_ION_MASK_XL, xl=True
256+
observation_state_a,
257+
ObservationState.NOT_OBS_AND_NOT_PRED,
258+
constants.B_ION_MASK_XL if cms2 else constants.B_ION_MASK,
259+
cms2=cms2,
229260
)
230261
self.metrics_val["count_not_observed_and_not_predicted_b_b"] = FragmentsRatio.count_observation_states(
231-
observation_state_b, ObservationState.NOT_OBS_AND_NOT_PRED, constants.B_ION_MASK_XL, xl=True
262+
observation_state_b,
263+
ObservationState.NOT_OBS_AND_NOT_PRED,
264+
constants.B_ION_MASK_XL if cms2 else constants.B_ION_MASK,
265+
cms2=cms2,
232266
)
233267
self.metrics_val["count_not_observed_and_not_predicted_y_a"] = FragmentsRatio.count_observation_states(
234-
observation_state_a, ObservationState.NOT_OBS_AND_NOT_PRED, constants.Y_ION_MASK_XL, xl=True
268+
observation_state_a,
269+
ObservationState.NOT_OBS_AND_NOT_PRED,
270+
constants.Y_ION_MASK_XL if cms2 else constants.Y_ION_MASK,
271+
cms2=cms2,
235272
)
236273
self.metrics_val["count_not_observed_and_not_predicted_y_b"] = FragmentsRatio.count_observation_states(
237-
observation_state_b, ObservationState.NOT_OBS_AND_NOT_PRED, constants.Y_ION_MASK_XL, xl=True
274+
observation_state_b,
275+
ObservationState.NOT_OBS_AND_NOT_PRED,
276+
constants.Y_ION_MASK_XL if cms2 else constants.Y_ION_MASK,
277+
cms2=cms2,
238278
)
239279
self.metrics_val["count_observed_but_not_predicted_a"] = FragmentsRatio.count_observation_states(
240-
observation_state_a, ObservationState.OBS_BUT_NOT_PRED, xl=True
280+
observation_state_a, ObservationState.OBS_BUT_NOT_PRED, cms2=cms2
241281
)
242282
self.metrics_val["count_observed_but_not_predicted_b"] = FragmentsRatio.count_observation_states(
243-
observation_state_b, ObservationState.OBS_BUT_NOT_PRED, xl=True
283+
observation_state_b, ObservationState.OBS_BUT_NOT_PRED, cms2=cms2
244284
)
245285
self.metrics_val["count_observed_but_not_predicted_b_a"] = FragmentsRatio.count_observation_states(
246-
observation_state_a, ObservationState.OBS_BUT_NOT_PRED, constants.B_ION_MASK_XL, xl=True
286+
observation_state_a,
287+
ObservationState.OBS_BUT_NOT_PRED,
288+
constants.B_ION_MASK_XL if cms2 else constants.B_ION_MASK,
289+
cms2=cms2,
247290
)
248291
self.metrics_val["count_observed_but_not_predicted_b_b"] = FragmentsRatio.count_observation_states(
249-
observation_state_b, ObservationState.OBS_BUT_NOT_PRED, constants.B_ION_MASK_XL, xl=True
292+
observation_state_b,
293+
ObservationState.OBS_BUT_NOT_PRED,
294+
constants.B_ION_MASK_XL if cms2 else constants.B_ION_MASK,
295+
cms2=cms2,
250296
)
251297
self.metrics_val["count_observed_but_not_predicted_y_a"] = FragmentsRatio.count_observation_states(
252-
observation_state_a, ObservationState.OBS_BUT_NOT_PRED, constants.Y_ION_MASK_XL, xl=True
298+
observation_state_a,
299+
ObservationState.OBS_BUT_NOT_PRED,
300+
constants.Y_ION_MASK_XL if cms2 else constants.Y_ION_MASK,
301+
cms2=cms2,
253302
)
254303
self.metrics_val["count_observed_but_not_predicted_y_b"] = FragmentsRatio.count_observation_states(
255-
observation_state_b, ObservationState.OBS_BUT_NOT_PRED, constants.Y_ION_MASK_XL, xl=True
304+
observation_state_b,
305+
ObservationState.OBS_BUT_NOT_PRED,
306+
constants.Y_ION_MASK_XL if cms2 else constants.Y_ION_MASK,
307+
cms2=cms2,
256308
)
257309
self.metrics_val["count_not_observed_but_predicted_a"] = FragmentsRatio.count_observation_states(
258-
observation_state_a, ObservationState.NOT_OBS_BUT_PRED, xl=True
310+
observation_state_a, ObservationState.NOT_OBS_BUT_PRED, cms2=cms2
259311
)
260312
self.metrics_val["count_not_observed_but_predicted_b"] = FragmentsRatio.count_observation_states(
261-
observation_state_b, ObservationState.NOT_OBS_BUT_PRED, xl=True
313+
observation_state_b, ObservationState.NOT_OBS_BUT_PRED, cms2=cms2
262314
)
263315
self.metrics_val["count_not_observed_but_predicted_b_a"] = FragmentsRatio.count_observation_states(
264-
observation_state_a, ObservationState.NOT_OBS_BUT_PRED, constants.B_ION_MASK_XL, xl=True
316+
observation_state_a,
317+
ObservationState.NOT_OBS_BUT_PRED,
318+
constants.B_ION_MASK_XL if cms2 else constants.B_ION_MASK,
319+
cms2=cms2,
265320
)
266321
self.metrics_val["count_not_observed_but_predicted_b_b"] = FragmentsRatio.count_observation_states(
267-
observation_state_b, ObservationState.NOT_OBS_BUT_PRED, constants.B_ION_MASK_XL, xl=True
322+
observation_state_b,
323+
ObservationState.NOT_OBS_BUT_PRED,
324+
constants.B_ION_MASK_XL if cms2 else constants.B_ION_MASK,
325+
cms2=cms2,
268326
)
269327
self.metrics_val["count_not_observed_but_predicted_y_a"] = FragmentsRatio.count_observation_states(
270-
observation_state_a, ObservationState.NOT_OBS_BUT_PRED, constants.Y_ION_MASK_XL, xl=True
328+
observation_state_a,
329+
ObservationState.NOT_OBS_BUT_PRED,
330+
constants.Y_ION_MASK_XL if cms2 else constants.Y_ION_MASK,
331+
cms2=cms2,
271332
)
272333
self.metrics_val["count_not_observed_but_predicted_y_b"] = FragmentsRatio.count_observation_states(
273-
observation_state_b, ObservationState.NOT_OBS_BUT_PRED, constants.Y_ION_MASK_XL, xl=True
334+
observation_state_b,
335+
ObservationState.NOT_OBS_BUT_PRED,
336+
constants.Y_ION_MASK_XL if cms2 else constants.Y_ION_MASK,
337+
cms2=cms2,
274338
)
275339
# fractional count metrics
276340
self.metrics_val["fraction_predicted_a"] = self.metrics_val["count_predicted_a"].values / valid_ions_a

spectrum_fundamentals/metrics/percolator.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ def __init__(
6060
additional_columns: Optional[Union[str, list]] = None,
6161
neutral_loss_flag: Optional[bool] = False,
6262
drop_miss_cleavage_flag: Optional[bool] = False,
63+
cms2: bool = False,
6364
):
6465
"""Initialize a Percolator obj."""
6566
super().__init__(pred_intensities, true_intensities, mz, "CROSSLINKER_TYPE" in metadata.columns)
@@ -72,6 +73,7 @@ def __init__(
7273
self.fdr_cutoff = fdr_cutoff
7374
self.neutral_loss_flag = neutral_loss_flag
7475
self.drop_miss_cleavage_flag = drop_miss_cleavage_flag
76+
self.cms2 = cms2
7577

7678
self.base_columns = [
7779
"raw_file",
@@ -456,9 +458,9 @@ def calc(self): # noqa: C901
456458
# add additional features
457459
self.add_additional_features()
458460
fragments_ratio = fr.FragmentsRatio(self.pred_intensities, self.true_intensities)
459-
fragments_ratio.calc(xl=self.xl)
461+
fragments_ratio.calc(xl=self.xl, cms2=self.cms2)
460462
similarity = sim.SimilarityMetrics(self.pred_intensities, self.true_intensities, self.mz)
461-
similarity.calc(self.all_features_flag, xl=self.xl)
463+
similarity.calc(self.all_features_flag, xl=self.xl, cms2=self.cms2)
462464

463465
self.metrics_val = pd.concat(
464466
[self.metrics_val, fragments_ratio.metrics_val, similarity.metrics_val], axis=1

0 commit comments

Comments
 (0)