Skip to content

Commit 3babf78

Browse files
committed
added get_all_token method
1 parent 04bec44 commit 3babf78

File tree

2 files changed

+17
-1
lines changed

2 files changed

+17
-1
lines changed

spectrum_fundamentals/mod_string.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import difflib
22
import re
33
from itertools import combinations, repeat
4-
from typing import Dict, List, Optional, Tuple, Union
4+
from typing import Dict, List, Optional, Set, Tuple, Union
55

66
import numpy as np
77
import pandas as pd
@@ -342,6 +342,15 @@ def split_modstring(sequence: str, r_pattern):
342342
return map(split_modstring, sequences, repeat(regex_pattern))
343343

344344

345+
def get_all_tokens(sequences: List[str]) -> Set[str]:
346+
"""Parse given sequences in UNIMOD ProForma standard into a set of all tokens."""
347+
pattern = r"[ACDEFGHIKLMNPQRSTVWY](\[UNIMOD:\d+\])?"
348+
tokens = set()
349+
for seq in sequences:
350+
tokens |= {match.group() for match in re.finditer(pattern, seq)}
351+
return tokens
352+
353+
345354
def add_permutations(modified_sequence: str, unimod_id: int, residues: List[str]):
346355
"""
347356
Generate different peptide sequences with moving the modification to all possible residues.

tests/unit_tests/test_mod_string.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,13 @@ def test_parse_modstrings_invalid_with_filtering(self):
309309
invalid_seq = "testing"
310310
self.assertEqual(next(mod.parse_modstrings([invalid_seq], alphabet=c.ALPHABET, filter=True)), [0])
311311

312+
def test_get_all_tokens(self):
313+
"""Test parsing of any UNIMOD sequence into tokens."""
314+
seqs = ["ACKC[UNIMOD:4]AD", "PEPTIDE", "PEM[UNIMOD:35]"]
315+
316+
result = mod.get_all_tokens(seqs)
317+
self.assertEqual(result, {"A", "C", "C[UNIMOD:4]", "D", "E", "I", "K", "M[UNIMOD:35]", "P", "T"})
318+
312319

313320
class TestCustomToInternal(unittest.TestCase):
314321
"""Class to test custom to internal."""

0 commit comments

Comments
 (0)