Skip to content

Commit

Permalink
refactor tests, add more tests, refactor str property
Browse files Browse the repository at this point in the history
  • Loading branch information
trichter committed Oct 28, 2024
1 parent 856f335 commit b668fc3
Show file tree
Hide file tree
Showing 8 changed files with 167 additions and 75 deletions.
3 changes: 2 additions & 1 deletion sugar/core/cane.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def _sorted(objs, keys=None, reverse=False, attr=None):
return objs


def _filter(objs, attr='meta', **kwargs):
def _filter(objs2, attr='meta', **kwargs):
"""
Filter objects, used by several objects in sugar.core
Expand All @@ -84,6 +84,7 @@ def _filter(objs, attr='meta', **kwargs):
'min': operator.ge,
'in': lambda a, b: a in b}
allowed_funcs = {'len': len}
objs = objs2
getv = lambda obj, key: (allowed_funcs[key](obj) if key in allowed_funcs else
getattr(obj, key, None) if attr is None else
getattr(getattr(obj, attr), key, None))
Expand Down
2 changes: 1 addition & 1 deletion sugar/core/fts.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ def name(self, value):

def __repr__(self):
"""Represent Feature as a string for debugging."""
return f"Feature('{self.type}', [{', '.join([loc.__repr__() for loc in self.locs])}], meta={self.meta!r})"
return f'Feature("{self.type}", [{", ".join([loc.__repr__() for loc in self.locs])}], meta={self.meta!r})'

@property
def loc_range(self):
Expand Down
68 changes: 36 additions & 32 deletions sugar/core/seq.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,6 @@ class _BioSeqStr():
def __init__(self, parent):
self.__parent = parent

def __deepcopy__(self, orig):
# TODO test
return self


def center(self, width, *args):
self.__parent.data = self.__parent.data.center(width, *args)
return self.__parent
Expand Down Expand Up @@ -255,16 +250,6 @@ class BioSeq():
"""

def __init__(self, data, id='', meta=None, type=None):
#: Namespace holding all available string methods,
#: see `_BioSeqStr` for available methods
#: and `python:str` for documentation of the methods
#:
#: .. rubric:: Example:
#:
#: >>> seq = read()[0]
#: >>> seq.str.find('ATG') # Use string method
#: 30
self.str = _BioSeqStr(self)
#: Property holding the data string
self.data = str(data).upper()
if hasattr(data, 'meta'):
Expand Down Expand Up @@ -299,7 +284,7 @@ def __str__(self):

def __repr__(self):
metastr = ', '.join(f'{prop}={repr(val)}' for prop, val in vars(self.meta).items())
return f'{type(self).__name__}([{repr(self.data)}, meta=dict({metastr}))'
return f'{type(self).__name__}({repr(self.data)}, meta=dict({metastr}))'

def __eq__(self, string):
if isinstance(string, BioSeq):
Expand Down Expand Up @@ -349,6 +334,21 @@ def __iadd__(self, other):
def __radd__(self, other):
return self.__class__(str(other) + self.data, meta=self.meta)

@property
def str(self):
"""
Namespace holding all available string methods,
see `_BioSeqStr` for available methods
and `python:str` for documentation of the methods
.. rubric:: Example:
>>> seq = read()[0]
>>> seq.str.find('ATG') # Use string method
30
"""
return _BioSeqStr(self)

@property
def id(self):
"""Alias for ``BioSeq.meta.id``"""
Expand Down Expand Up @@ -383,7 +383,7 @@ def add_fts(self, fts):
:param fts: features to add
"""
self.fts = self.fts + fts
self.fts = self.fts + FeatureList(fts)
self.fts.sort()

@property
Expand Down Expand Up @@ -732,20 +732,6 @@ class BioBasket(collections.UserList):
attribute.
"""
def __init__(self, data=None, meta=None):
# Documentation for str attribute:
#: Namespace holding all available string methods,
#:
#: The `BioBasket.str` methods call the corresponding `BioSeq.str` methods under the hood
#: and return either the altered `BioBasket` object or a list with results.
#: See `_BioSeqStr` for available methods
#: and `python:str` for documentation of the methods
#:
#: .. rubric:: Example:
#:
#: >>> seqs = read()
#: >>> seqs.str.find('ATG') # Use string method
#: [30, 12]
self.str = _BioBasketStr(self)
if data is None:
data = []
if hasattr(data, 'meta'):
Expand All @@ -766,6 +752,24 @@ def __eq__(self, other):
return self.data == other.data and self.meta == other.meta
return self.data == other

@property
def str(self):
"""
Namespace holding all available string methods.
The `BioBasket.str` methods call the corresponding `BioSeq.str` methods under the hood
and return either the altered `BioBasket` object or a list with results.
See `_BioSeqStr` for available methods
and `python:str` for documentation of the methods.
.. rubric:: Example:
>>> seqs = read()
>>> seqs.str.find('ATG') # Use string method
[30, 12]
"""
return _BioBasketStr(self)

@property
def ids(self):
"""List of sequence ids"""
Expand Down Expand Up @@ -805,7 +809,7 @@ def add_fts(self, fts):
for seq in self:
if seq.id in fts:
seq.fts = seq.fts + fts.pop(seq.id)
seq.meta.fts.sort()
seq.fts.sort()
if len(fts) > 0:
missing_ids = ', '.join(fts.keys())
warn(f'Features for seqids {missing_ids} could not be '
Expand Down
4 changes: 3 additions & 1 deletion sugar/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,9 @@ def _submat_files():
@lru_cache
def submat(fname):
"""
Return substition matrix as a dict of dicts
Return substitution matrix as a dict of dicts
>>> from sugar.data import submat
>>> bl = submat('blosum62')
>>> bl['A']['A']
4
Expand Down Expand Up @@ -123,6 +124,7 @@ def gcode(tt=1):
:param tt: number of the translation table (default: 1)
>>> from sugar.data import gcode
>>> gc = gcode()
>>> gc.tt['TAG']
'*'
Expand Down
46 changes: 45 additions & 1 deletion sugar/tests/test_core_cane.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# (C) 2024, Tom Eulenfeld, MIT license
import pytest

from sugar import read, read_fts
from sugar import read, read_fts, BioSeq, BioBasket
from sugar.core.cane import translate


Expand Down Expand Up @@ -38,6 +38,45 @@ def test_translate_final_stop():

# TODO more translation tests

def test_match():
seq = BioSeq('NNNUAGDDDUAGAUG')
seqs = BioBasket([seq])
seq2 = BioSeq('-UU-U-AG')
assert seq.match('stop').start() == 3
assert seq.match('start').end() == len(seq)
matches = seq.matchall('stop')
assert matches[0].span() == seq.match('stop').span()
assert len(matches) == 2
assert seqs.match('stop')[0].start() == 3
matches = seq2.matchall('stop', gap=None)
assert seqs.matchall('stop')[0].start() == 3
assert len(matches) == 0
match = seq2.match('stop', gap='-')
assert match.group() == 'U-AG'
assert seq2.match('stop', gap='-', rf=1) == None
assert seq2.match('stop', gap='-', rf=2).group() == 'U-AG'
assert seq2.match('stop', gap='-', rf=(1, 2)).group() == 'U-AG'
assert seq2.match('stop', gap='-', rf=(0, 1)) == None
seq3 = seq2.copy().rc()
match3 = seq3.match('stop', gap='-', rf='bwd')
assert match.span() == match3._match.span()
assert match.span() != match3.span()


def test_orf():
seqs=read()
orfs = seqs[0].find_orfs()
assert len(orfs) > 0
longest_orf = orfs.sort(len)[-1]
assert seqs[0][longest_orf] == seqs[0]['cds']

orfs2 = seqs[0].find_orfs(rf='both')
assert len(orfs2) > len(orfs)

orfs = seqs.find_orfs()
for id_ in seqs.ids:
assert seqs.d[id_][orfs.d[id_].sort(len)[-1]] == seqs.d[id_]['cds']


def test_filter_fts():
fts = read_fts()
Expand All @@ -54,6 +93,11 @@ def test_filter_seqs():
seqs = read()
seqs.filter(len_gt=9500)
assert len(seqs) == 1
seqs = read()
seqs2 = seqs.filter(len_gt=9500, inplace=False)
assert len(seqs2) == 1
assert len(seqs2) < len(seqs)



def test_groupby_fts_nested():
Expand Down
101 changes: 64 additions & 37 deletions sugar/tests/test_core_seq.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,17 @@
from sugar.tests.util import tempfilename


def test_siformat():
from sugar.core.seq import _si_format
assert _si_format(10000) == '10k'
assert _si_format(0) == '0'


def test_attr():
assert Attr(a=1) == Attr(a=1)
assert Attr(a=1) != Attr(a=2)


def test_bioseq_equal():
s1 = BioSeq('bla', id='5')
s2 = BioSeq('bla', id='5')
Expand Down Expand Up @@ -73,6 +80,11 @@ def test_copy():
assert seq.copy()[10:] != seq
assert len(seq.copy()[10:]) == n - 10
assert seq.copy() == seq
seqs = read()
seqs2 = seqs.copy()
assert seqs2 == seqs
seqs2[0].data = 'NNN'
assert seqs2 != seqs


def test_countall():
Expand Down Expand Up @@ -102,10 +114,22 @@ def test_meta_str():
assert 'CDS' in str(meta)


def test_biobasket_str():
seqs = read()
seqs2 = seqs.copy()
seqs2.data = []
assert str(seqs2).startswith('0 seq')
seqs2 = seqs.copy()
seqs.data = 10 * seqs.data
assert '...' in str(seqs2)


def test_shortcuts():
seq = read()[0]
assert seq.id == seq.meta.id
assert seq.fts == seq.meta.fts
seq.id = 'XXX'
assert seq.id == seq.meta.id


def test_getitem():
Expand Down Expand Up @@ -140,6 +164,13 @@ def test_getitem():
# assert seqs[0][3:6].meta.features[0].orig_len == 4
# assert len(seqs[0][10:20].meta.features) == 0

## TODO!!!


def test_sl_slicable_inplace():
seqs = read()
assert seqs.sl()[:1] == seqs[:1]


def test_setitem():
seqs = read()
Expand All @@ -150,6 +181,9 @@ def test_setitem():
seqs[0] = 'ABC'
assert isinstance(seqs[0], BioSeq)
assert seqs[0] == 'ABC'
seqs = read()
seqs[:2] = ['AGT', 'TGA']
assert str(seqs[0]) == 'AGT'


def test_add_fts():
Expand All @@ -167,41 +201,34 @@ def test_add_fts():
assert seq.fts[1] == ft
assert seq.fts[-1] != ft

ft = seqs.fts[0]
ft.seqid = 'unknown'
with pytest.warns(UserWarning, match='.*unknown'):
seqs.add_fts([ft])
with pytest.warns(UserWarning, match='.*unknown'):
seqs.fts = [ft]
with pytest.warns(UserWarning, match='.*mismatch'):
seqs[0].add_fts([ft])


def test_biobasket_rc():
seqs = read()
seqs2 = seqs.copy().rc()
assert seqs[0].rc() == seqs2[0]


def test_repr():
from sugar import Location, Meta
seqs = read()
assert eval(repr(seqs[0])) == seqs[0]
assert eval(repr(seqs)) == seqs


def test_magic_methods():
# TODO
pass


def test_match():
seq = BioSeq('NNNUAGDDDUAGAUG')
seqs = BioBasket([seq])
seq2 = BioSeq('-UU-U-AG')
assert seq.match('stop').start() == 3
assert seq.match('start').end() == len(seq)
matches = seq.matchall('stop')
assert matches[0].span() == seq.match('stop').span()
assert len(matches) == 2
assert seqs.match('stop')[0].start() == 3
matches = seq2.matchall('stop', gap=None)
assert len(matches) == 0
match = seq2.match('stop', gap='-')
assert match.group() == 'U-AG'
assert seq2.match('stop', gap='-', rf=1) == None
assert seq2.match('stop', gap='-', rf=2).group() == 'U-AG'
assert seq2.match('stop', gap='-', rf=(1, 2)).group() == 'U-AG'
assert seq2.match('stop', gap='-', rf=(0, 1)) == None
seq3 = seq2.copy().rc()
match3 = seq3.match('stop', gap='-', rf='bwd')
assert match.span() == match3._match.span()
assert match.span() != match3.span()


def test_orf():
seqs=read()
orfs = seqs[0].find_orfs()
assert len(orfs) > 0
longest_orf = orfs.sort(len)[-1]
assert seqs[0][longest_orf] == seqs[0]['cds']

orfs2 = seqs[0].find_orfs(rf='both')
assert len(orfs2) > len(orfs)

orfs = seqs.find_orfs()
for id_ in seqs.ids:
assert seqs.d[id_][orfs.d[id_].sort(len)[-1]] == seqs.d[id_]['cds']
def test_str_methods():
# TODO
pass
Loading

0 comments on commit b668fc3

Please sign in to comment.