Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions spectrum_fundamentals/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,15 @@
"dsbu": "[UNIMOD:1884]",
}

#######################
# OpenMS constants #
#######################

OPENMS_VAR_MODS = {
"M(Oxidation)": "M[UNIMOD:35]",
"C(Carbamidomethyl)": "C[UNIMOD:4]"
}

####################
# MASS CALCULATION #
####################
Expand Down
47 changes: 46 additions & 1 deletion spectrum_fundamentals/mod_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import numpy as np
import pandas as pd

from .constants import MOD_MASSES, MOD_NAMES, SPECTRONAUT_MODS, XISEARCH_VAR_MODS
from .constants import MOD_MASSES, MOD_NAMES, SPECTRONAUT_MODS, XISEARCH_VAR_MODS, OPENMS_VAR_MODS


def sage_to_internal(sequences: List[str], mods: Dict[str, str]) -> List[str]:
Expand Down Expand Up @@ -188,6 +188,51 @@ def msfragger_to_internal(sequences: Union[np.ndarray, pd.Series, List[str]], mo
"""
return _to_internal(sequences=sequences, mods=mods)

def openms_to_internal(sequences: List[str], fixed_mods: Optional[Dict[str, str]] = None) -> List[str]:
"""
Function to translate a OpenMS modstring to the Prosit format.

:param sequences: List[str] of sequences
:param fixed_mods: Optional dictionary of modifications with key aa and value mod, e.g. 'M(Oxidation)': 'M(UNIMOD:35)'.
Fixed modifications must be included in the variable modificatons dictionary.
By default, i.e. if nothing is supplied to fixed_mods, carbamidomethylation on cystein will be included
in the fixed modifications. If you want to have no fixed modifictions at all, supply fixed_mods={}
:raises AssertionError: if illegal modification was provided in the fixed_mods dictionary.
:return: a list of modified sequences
"""

if fixed_mods is None:
fixed_mods = {"C": "C[UNIMOD:4]"}
err_msg = f"Provided illegal fixed mod, supported modifications are {set(OPENMS_VAR_MODS.values())}."
assert all(x in OPENMS_VAR_MODS.values() for x in fixed_mods.values()), err_msg

replacements = {**OPENMS_VAR_MODS, **fixed_mods}

def custom_regex_escape(key: str) -> str:
"""
Subfunction to escape only normal brackets in the modstring.

:param key: The match to escape
:return: match with escaped special characters
"""
for k, v in {"(": r"\(", ")": r"\)"}.items():
key = key.replace(k, v)
return key

regex = re.compile("|".join(map(custom_regex_escape, replacements.keys())))

def find_replacement(match: re.Match) -> str:
"""
Subfunction to find the corresponding substitution for a match.

:param match: an re.Match object found by re.sub
:return: substitution string for the given match
"""
key = match.string[match.start() : match.end()]

return replacements[key]

return [regex.sub(find_replacement, seq) for seq in sequences]

def internal_without_mods(sequences: List[str]) -> List[str]:
"""
Expand Down
Loading