From b668fc3e941a0b5c3b48ae6771de78238e63bc2c Mon Sep 17 00:00:00 2001 From: Tom Eulenfeld Date: Mon, 28 Oct 2024 01:01:51 +0100 Subject: [PATCH] refactor tests, add more tests, refactor str property --- sugar/core/cane.py | 3 +- sugar/core/fts.py | 2 +- sugar/core/seq.py | 68 ++++++++++++----------- sugar/data/__init__.py | 4 +- sugar/tests/test_core_cane.py | 46 +++++++++++++++- sugar/tests/test_core_seq.py | 101 +++++++++++++++++++++------------- sugar/tests/test_docs.py | 6 +- sugar/tests/test_io.py | 12 ++++ 8 files changed, 167 insertions(+), 75 deletions(-) diff --git a/sugar/core/cane.py b/sugar/core/cane.py index af9d337..9c116a5 100644 --- a/sugar/core/cane.py +++ b/sugar/core/cane.py @@ -64,7 +64,7 @@ def _sorted(objs, keys=None, reverse=False, attr=None): return objs -def _filter(objs, attr='meta', **kwargs): +def _filter(objs2, attr='meta', **kwargs): """ Filter objects, used by several objects in sugar.core @@ -84,6 +84,7 @@ def _filter(objs, attr='meta', **kwargs): 'min': operator.ge, 'in': lambda a, b: a in b} allowed_funcs = {'len': len} + objs = objs2 getv = lambda obj, key: (allowed_funcs[key](obj) if key in allowed_funcs else getattr(obj, key, None) if attr is None else getattr(getattr(obj, attr), key, None)) diff --git a/sugar/core/fts.py b/sugar/core/fts.py index 7b12931..9e4f3ca 100644 --- a/sugar/core/fts.py +++ b/sugar/core/fts.py @@ -313,7 +313,7 @@ def name(self, value): def __repr__(self): """Represent Feature as a string for debugging.""" - return f"Feature('{self.type}', [{', '.join([loc.__repr__() for loc in self.locs])}], meta={self.meta!r})" + return f'Feature("{self.type}", [{", ".join([loc.__repr__() for loc in self.locs])}], meta={self.meta!r})' @property def loc_range(self): diff --git a/sugar/core/seq.py b/sugar/core/seq.py index e5bf780..68dcdf3 100644 --- a/sugar/core/seq.py +++ b/sugar/core/seq.py @@ -41,11 +41,6 @@ class _BioSeqStr(): def __init__(self, parent): self.__parent = parent - def __deepcopy__(self, orig): - # TODO test - return self - - def center(self, width, *args): self.__parent.data = self.__parent.data.center(width, *args) return self.__parent @@ -255,16 +250,6 @@ class BioSeq(): """ def __init__(self, data, id='', meta=None, type=None): - #: Namespace holding all available string methods, - #: see `_BioSeqStr` for available methods - #: and `python:str` for documentation of the methods - #: - #: .. rubric:: Example: - #: - #: >>> seq = read()[0] - #: >>> seq.str.find('ATG') # Use string method - #: 30 - self.str = _BioSeqStr(self) #: Property holding the data string self.data = str(data).upper() if hasattr(data, 'meta'): @@ -299,7 +284,7 @@ def __str__(self): def __repr__(self): metastr = ', '.join(f'{prop}={repr(val)}' for prop, val in vars(self.meta).items()) - return f'{type(self).__name__}([{repr(self.data)}, meta=dict({metastr}))' + return f'{type(self).__name__}({repr(self.data)}, meta=dict({metastr}))' def __eq__(self, string): if isinstance(string, BioSeq): @@ -349,6 +334,21 @@ def __iadd__(self, other): def __radd__(self, other): return self.__class__(str(other) + self.data, meta=self.meta) + @property + def str(self): + """ + Namespace holding all available string methods, + see `_BioSeqStr` for available methods + and `python:str` for documentation of the methods + + .. rubric:: Example: + + >>> seq = read()[0] + >>> seq.str.find('ATG') # Use string method + 30 + """ + return _BioSeqStr(self) + @property def id(self): """Alias for ``BioSeq.meta.id``""" @@ -383,7 +383,7 @@ def add_fts(self, fts): :param fts: features to add """ - self.fts = self.fts + fts + self.fts = self.fts + FeatureList(fts) self.fts.sort() @property @@ -732,20 +732,6 @@ class BioBasket(collections.UserList): attribute. """ def __init__(self, data=None, meta=None): - # Documentation for str attribute: - #: Namespace holding all available string methods, - #: - #: The `BioBasket.str` methods call the corresponding `BioSeq.str` methods under the hood - #: and return either the altered `BioBasket` object or a list with results. - #: See `_BioSeqStr` for available methods - #: and `python:str` for documentation of the methods - #: - #: .. rubric:: Example: - #: - #: >>> seqs = read() - #: >>> seqs.str.find('ATG') # Use string method - #: [30, 12] - self.str = _BioBasketStr(self) if data is None: data = [] if hasattr(data, 'meta'): @@ -766,6 +752,24 @@ def __eq__(self, other): return self.data == other.data and self.meta == other.meta return self.data == other + @property + def str(self): + """ + Namespace holding all available string methods. + + The `BioBasket.str` methods call the corresponding `BioSeq.str` methods under the hood + and return either the altered `BioBasket` object or a list with results. + See `_BioSeqStr` for available methods + and `python:str` for documentation of the methods. + + .. rubric:: Example: + + >>> seqs = read() + >>> seqs.str.find('ATG') # Use string method + [30, 12] + """ + return _BioBasketStr(self) + @property def ids(self): """List of sequence ids""" @@ -805,7 +809,7 @@ def add_fts(self, fts): for seq in self: if seq.id in fts: seq.fts = seq.fts + fts.pop(seq.id) - seq.meta.fts.sort() + seq.fts.sort() if len(fts) > 0: missing_ids = ', '.join(fts.keys()) warn(f'Features for seqids {missing_ids} could not be ' diff --git a/sugar/data/__init__.py b/sugar/data/__init__.py index 7872987..f307ddc 100644 --- a/sugar/data/__init__.py +++ b/sugar/data/__init__.py @@ -65,8 +65,9 @@ def _submat_files(): @lru_cache def submat(fname): """ - Return substition matrix as a dict of dicts + Return substitution matrix as a dict of dicts + >>> from sugar.data import submat >>> bl = submat('blosum62') >>> bl['A']['A'] 4 @@ -123,6 +124,7 @@ def gcode(tt=1): :param tt: number of the translation table (default: 1) + >>> from sugar.data import gcode >>> gc = gcode() >>> gc.tt['TAG'] '*' diff --git a/sugar/tests/test_core_cane.py b/sugar/tests/test_core_cane.py index 71076ff..78eed5c 100644 --- a/sugar/tests/test_core_cane.py +++ b/sugar/tests/test_core_cane.py @@ -1,7 +1,7 @@ # (C) 2024, Tom Eulenfeld, MIT license import pytest -from sugar import read, read_fts +from sugar import read, read_fts, BioSeq, BioBasket from sugar.core.cane import translate @@ -38,6 +38,45 @@ def test_translate_final_stop(): # TODO more translation tests +def test_match(): + seq = BioSeq('NNNUAGDDDUAGAUG') + seqs = BioBasket([seq]) + seq2 = BioSeq('-UU-U-AG') + assert seq.match('stop').start() == 3 + assert seq.match('start').end() == len(seq) + matches = seq.matchall('stop') + assert matches[0].span() == seq.match('stop').span() + assert len(matches) == 2 + assert seqs.match('stop')[0].start() == 3 + matches = seq2.matchall('stop', gap=None) + assert seqs.matchall('stop')[0].start() == 3 + assert len(matches) == 0 + match = seq2.match('stop', gap='-') + assert match.group() == 'U-AG' + assert seq2.match('stop', gap='-', rf=1) == None + assert seq2.match('stop', gap='-', rf=2).group() == 'U-AG' + assert seq2.match('stop', gap='-', rf=(1, 2)).group() == 'U-AG' + assert seq2.match('stop', gap='-', rf=(0, 1)) == None + seq3 = seq2.copy().rc() + match3 = seq3.match('stop', gap='-', rf='bwd') + assert match.span() == match3._match.span() + assert match.span() != match3.span() + + +def test_orf(): + seqs=read() + orfs = seqs[0].find_orfs() + assert len(orfs) > 0 + longest_orf = orfs.sort(len)[-1] + assert seqs[0][longest_orf] == seqs[0]['cds'] + + orfs2 = seqs[0].find_orfs(rf='both') + assert len(orfs2) > len(orfs) + + orfs = seqs.find_orfs() + for id_ in seqs.ids: + assert seqs.d[id_][orfs.d[id_].sort(len)[-1]] == seqs.d[id_]['cds'] + def test_filter_fts(): fts = read_fts() @@ -54,6 +93,11 @@ def test_filter_seqs(): seqs = read() seqs.filter(len_gt=9500) assert len(seqs) == 1 + seqs = read() + seqs2 = seqs.filter(len_gt=9500, inplace=False) + assert len(seqs2) == 1 + assert len(seqs2) < len(seqs) + def test_groupby_fts_nested(): diff --git a/sugar/tests/test_core_seq.py b/sugar/tests/test_core_seq.py index fa3cab2..8c099ac 100644 --- a/sugar/tests/test_core_seq.py +++ b/sugar/tests/test_core_seq.py @@ -5,10 +5,17 @@ from sugar.tests.util import tempfilename +def test_siformat(): + from sugar.core.seq import _si_format + assert _si_format(10000) == '10k' + assert _si_format(0) == '0' + + def test_attr(): assert Attr(a=1) == Attr(a=1) assert Attr(a=1) != Attr(a=2) + def test_bioseq_equal(): s1 = BioSeq('bla', id='5') s2 = BioSeq('bla', id='5') @@ -73,6 +80,11 @@ def test_copy(): assert seq.copy()[10:] != seq assert len(seq.copy()[10:]) == n - 10 assert seq.copy() == seq + seqs = read() + seqs2 = seqs.copy() + assert seqs2 == seqs + seqs2[0].data = 'NNN' + assert seqs2 != seqs def test_countall(): @@ -102,10 +114,22 @@ def test_meta_str(): assert 'CDS' in str(meta) +def test_biobasket_str(): + seqs = read() + seqs2 = seqs.copy() + seqs2.data = [] + assert str(seqs2).startswith('0 seq') + seqs2 = seqs.copy() + seqs.data = 10 * seqs.data + assert '...' in str(seqs2) + + def test_shortcuts(): seq = read()[0] assert seq.id == seq.meta.id assert seq.fts == seq.meta.fts + seq.id = 'XXX' + assert seq.id == seq.meta.id def test_getitem(): @@ -140,6 +164,13 @@ def test_getitem(): # assert seqs[0][3:6].meta.features[0].orig_len == 4 # assert len(seqs[0][10:20].meta.features) == 0 + ## TODO!!! + + +def test_sl_slicable_inplace(): + seqs = read() + assert seqs.sl()[:1] == seqs[:1] + def test_setitem(): seqs = read() @@ -150,6 +181,9 @@ def test_setitem(): seqs[0] = 'ABC' assert isinstance(seqs[0], BioSeq) assert seqs[0] == 'ABC' + seqs = read() + seqs[:2] = ['AGT', 'TGA'] + assert str(seqs[0]) == 'AGT' def test_add_fts(): @@ -167,41 +201,34 @@ def test_add_fts(): assert seq.fts[1] == ft assert seq.fts[-1] != ft + ft = seqs.fts[0] + ft.seqid = 'unknown' + with pytest.warns(UserWarning, match='.*unknown'): + seqs.add_fts([ft]) + with pytest.warns(UserWarning, match='.*unknown'): + seqs.fts = [ft] + with pytest.warns(UserWarning, match='.*mismatch'): + seqs[0].add_fts([ft]) + + +def test_biobasket_rc(): + seqs = read() + seqs2 = seqs.copy().rc() + assert seqs[0].rc() == seqs2[0] + + +def test_repr(): + from sugar import Location, Meta + seqs = read() + assert eval(repr(seqs[0])) == seqs[0] + assert eval(repr(seqs)) == seqs + + +def test_magic_methods(): + # TODO + pass + -def test_match(): - seq = BioSeq('NNNUAGDDDUAGAUG') - seqs = BioBasket([seq]) - seq2 = BioSeq('-UU-U-AG') - assert seq.match('stop').start() == 3 - assert seq.match('start').end() == len(seq) - matches = seq.matchall('stop') - assert matches[0].span() == seq.match('stop').span() - assert len(matches) == 2 - assert seqs.match('stop')[0].start() == 3 - matches = seq2.matchall('stop', gap=None) - assert len(matches) == 0 - match = seq2.match('stop', gap='-') - assert match.group() == 'U-AG' - assert seq2.match('stop', gap='-', rf=1) == None - assert seq2.match('stop', gap='-', rf=2).group() == 'U-AG' - assert seq2.match('stop', gap='-', rf=(1, 2)).group() == 'U-AG' - assert seq2.match('stop', gap='-', rf=(0, 1)) == None - seq3 = seq2.copy().rc() - match3 = seq3.match('stop', gap='-', rf='bwd') - assert match.span() == match3._match.span() - assert match.span() != match3.span() - - -def test_orf(): - seqs=read() - orfs = seqs[0].find_orfs() - assert len(orfs) > 0 - longest_orf = orfs.sort(len)[-1] - assert seqs[0][longest_orf] == seqs[0]['cds'] - - orfs2 = seqs[0].find_orfs(rf='both') - assert len(orfs2) > len(orfs) - - orfs = seqs.find_orfs() - for id_ in seqs.ids: - assert seqs.d[id_][orfs.d[id_].sort(len)[-1]] == seqs.d[id_]['cds'] +def test_str_methods(): + # TODO + pass diff --git a/sugar/tests/test_docs.py b/sugar/tests/test_docs.py index 71a52da..2061156 100644 --- a/sugar/tests/test_docs.py +++ b/sugar/tests/test_docs.py @@ -2,20 +2,22 @@ from contextlib import redirect_stdout import io +from sugar import read, read_fts def doctest_module(m): from doctest import testmod, ELLIPSIS raised = False flags = ELLIPSIS + globs = {'read': read, 'read_fts': read_fts} try: - testmod(m, raise_on_error=True, optionflags=flags) + testmod(m, raise_on_error=True, optionflags=flags, extraglobs=globs) except Exception: raised = True if raised: report = io.StringIO() with redirect_stdout(report): - testmod(m, optionflags=flags, report=True) + testmod(m, optionflags=flags, report=True, extraglobs=globs) assert report.getvalue() == '' diff --git a/sugar/tests/test_io.py b/sugar/tests/test_io.py index 11889a4..101e0f6 100644 --- a/sugar/tests/test_io.py +++ b/sugar/tests/test_io.py @@ -69,6 +69,18 @@ def test_io_file(): assert str(seq2) == str(seq1) +def test_write_fmtstr_seq(): + seqs = read() + with tempfile.NamedTemporaryFile() as f: + seqs[0].write(f.name, 'fasta') + f.seek(0) + seqs2 = read(f.name) + assert str(seqs2[0]) == str(seqs[0]) + s = seqs[0].tofmtstr('fasta') + seqs2 = seqs.fromfmtstr(s) + assert str(seqs2[0]) == str(seqs[0]) + + def test_io_fmtstr(): seqs = read() for fmt in TESTIOFMTS: