Skip to content

Commit 340e294

Browse files
authored
Merge pull request #101 from Arslan-Siraj/OpenMS_support
initiate OpenMS support
2 parents 728b111 + 4043fae commit 340e294

File tree

2 files changed

+55
-1
lines changed

2 files changed

+55
-1
lines changed

spectrum_fundamentals/constants.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,15 @@
134134
"dsbu": "[UNIMOD:1884]",
135135
}
136136

137+
#######################
138+
# OpenMS constants #
139+
#######################
140+
141+
OPENMS_VAR_MODS = {
142+
"M(Oxidation)": "M[UNIMOD:35]",
143+
"C(Carbamidomethyl)": "C[UNIMOD:4]"
144+
}
145+
137146
####################
138147
# MASS CALCULATION #
139148
####################

spectrum_fundamentals/mod_string.py

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import numpy as np
77
import pandas as pd
88

9-
from .constants import MOD_MASSES, MOD_NAMES, SPECTRONAUT_MODS, XISEARCH_VAR_MODS
9+
from .constants import MOD_MASSES, MOD_NAMES, SPECTRONAUT_MODS, XISEARCH_VAR_MODS, OPENMS_VAR_MODS
1010

1111

1212
def sage_to_internal(sequences: List[str], mods: Dict[str, str]) -> List[str]:
@@ -188,6 +188,51 @@ def msfragger_to_internal(sequences: Union[np.ndarray, pd.Series, List[str]], mo
188188
"""
189189
return _to_internal(sequences=sequences, mods=mods)
190190

191+
def openms_to_internal(sequences: List[str], fixed_mods: Optional[Dict[str, str]] = None) -> List[str]:
192+
"""
193+
Function to translate a OpenMS modstring to the Prosit format.
194+
195+
:param sequences: List[str] of sequences
196+
:param fixed_mods: Optional dictionary of modifications with key aa and value mod, e.g. 'M(Oxidation)': 'M(UNIMOD:35)'.
197+
Fixed modifications must be included in the variable modificatons dictionary.
198+
By default, i.e. if nothing is supplied to fixed_mods, carbamidomethylation on cystein will be included
199+
in the fixed modifications. If you want to have no fixed modifictions at all, supply fixed_mods={}
200+
:raises AssertionError: if illegal modification was provided in the fixed_mods dictionary.
201+
:return: a list of modified sequences
202+
"""
203+
204+
if fixed_mods is None:
205+
fixed_mods = {"C": "C[UNIMOD:4]"}
206+
err_msg = f"Provided illegal fixed mod, supported modifications are {set(OPENMS_VAR_MODS.values())}."
207+
assert all(x in OPENMS_VAR_MODS.values() for x in fixed_mods.values()), err_msg
208+
209+
replacements = {**OPENMS_VAR_MODS, **fixed_mods}
210+
211+
def custom_regex_escape(key: str) -> str:
212+
"""
213+
Subfunction to escape only normal brackets in the modstring.
214+
215+
:param key: The match to escape
216+
:return: match with escaped special characters
217+
"""
218+
for k, v in {"(": r"\(", ")": r"\)"}.items():
219+
key = key.replace(k, v)
220+
return key
221+
222+
regex = re.compile("|".join(map(custom_regex_escape, replacements.keys())))
223+
224+
def find_replacement(match: re.Match) -> str:
225+
"""
226+
Subfunction to find the corresponding substitution for a match.
227+
228+
:param match: an re.Match object found by re.sub
229+
:return: substitution string for the given match
230+
"""
231+
key = match.string[match.start() : match.end()]
232+
233+
return replacements[key]
234+
235+
return [regex.sub(find_replacement, seq) for seq in sequences]
191236

192237
def internal_without_mods(sequences: List[str]) -> List[str]:
193238
"""

0 commit comments

Comments
 (0)