|
6 | 6 | import numpy as np |
7 | 7 | import pandas as pd |
8 | 8 |
|
9 | | -from .constants import MOD_MASSES, MOD_NAMES, SPECTRONAUT_MODS, XISEARCH_VAR_MODS |
| 9 | +from .constants import MOD_MASSES, MOD_NAMES, SPECTRONAUT_MODS, XISEARCH_VAR_MODS, OPENMS_VAR_MODS |
10 | 10 |
|
11 | 11 |
|
12 | 12 | def sage_to_internal(sequences: List[str], mods: Dict[str, str]) -> List[str]: |
@@ -188,6 +188,51 @@ def msfragger_to_internal(sequences: Union[np.ndarray, pd.Series, List[str]], mo |
188 | 188 | """ |
189 | 189 | return _to_internal(sequences=sequences, mods=mods) |
190 | 190 |
|
| 191 | +def openms_to_internal(sequences: List[str], fixed_mods: Optional[Dict[str, str]] = None) -> List[str]: |
| 192 | + """ |
| 193 | + Function to translate a OpenMS modstring to the Prosit format. |
| 194 | +
|
| 195 | + :param sequences: List[str] of sequences |
| 196 | + :param fixed_mods: Optional dictionary of modifications with key aa and value mod, e.g. 'M(Oxidation)': 'M(UNIMOD:35)'. |
| 197 | + Fixed modifications must be included in the variable modificatons dictionary. |
| 198 | + By default, i.e. if nothing is supplied to fixed_mods, carbamidomethylation on cystein will be included |
| 199 | + in the fixed modifications. If you want to have no fixed modifictions at all, supply fixed_mods={} |
| 200 | + :raises AssertionError: if illegal modification was provided in the fixed_mods dictionary. |
| 201 | + :return: a list of modified sequences |
| 202 | + """ |
| 203 | + |
| 204 | + if fixed_mods is None: |
| 205 | + fixed_mods = {"C": "C[UNIMOD:4]"} |
| 206 | + err_msg = f"Provided illegal fixed mod, supported modifications are {set(OPENMS_VAR_MODS.values())}." |
| 207 | + assert all(x in OPENMS_VAR_MODS.values() for x in fixed_mods.values()), err_msg |
| 208 | + |
| 209 | + replacements = {**OPENMS_VAR_MODS, **fixed_mods} |
| 210 | + |
| 211 | + def custom_regex_escape(key: str) -> str: |
| 212 | + """ |
| 213 | + Subfunction to escape only normal brackets in the modstring. |
| 214 | +
|
| 215 | + :param key: The match to escape |
| 216 | + :return: match with escaped special characters |
| 217 | + """ |
| 218 | + for k, v in {"(": r"\(", ")": r"\)"}.items(): |
| 219 | + key = key.replace(k, v) |
| 220 | + return key |
| 221 | + |
| 222 | + regex = re.compile("|".join(map(custom_regex_escape, replacements.keys()))) |
| 223 | + |
| 224 | + def find_replacement(match: re.Match) -> str: |
| 225 | + """ |
| 226 | + Subfunction to find the corresponding substitution for a match. |
| 227 | +
|
| 228 | + :param match: an re.Match object found by re.sub |
| 229 | + :return: substitution string for the given match |
| 230 | + """ |
| 231 | + key = match.string[match.start() : match.end()] |
| 232 | + |
| 233 | + return replacements[key] |
| 234 | + |
| 235 | + return [regex.sub(find_replacement, seq) for seq in sequences] |
191 | 236 |
|
192 | 237 | def internal_without_mods(sequences: List[str]) -> List[str]: |
193 | 238 | """ |
|
0 commit comments