From b668fc3e941a0b5c3b48ae6771de78238e63bc2c Mon Sep 17 00:00:00 2001
From: Tom Eulenfeld <tom.eulenfeld@uni-jena.de>
Date: Mon, 28 Oct 2024 01:01:51 +0100
Subject: [PATCH] refactor tests, add more tests, refactor str property

---
 sugar/core/cane.py            |   3 +-
 sugar/core/fts.py             |   2 +-
 sugar/core/seq.py             |  68 ++++++++++++-----------
 sugar/data/__init__.py        |   4 +-
 sugar/tests/test_core_cane.py |  46 +++++++++++++++-
 sugar/tests/test_core_seq.py  | 101 +++++++++++++++++++++-------------
 sugar/tests/test_docs.py      |   6 +-
 sugar/tests/test_io.py        |  12 ++++
 8 files changed, 167 insertions(+), 75 deletions(-)

diff --git a/sugar/core/cane.py b/sugar/core/cane.py
index af9d337..9c116a5 100644
--- a/sugar/core/cane.py
+++ b/sugar/core/cane.py
@@ -64,7 +64,7 @@ def _sorted(objs, keys=None, reverse=False, attr=None):
     return objs
 
 
-def _filter(objs, attr='meta', **kwargs):
+def _filter(objs2, attr='meta', **kwargs):
     """
     Filter objects, used by several objects in sugar.core
 
@@ -84,6 +84,7 @@ def _filter(objs, attr='meta', **kwargs):
            'min': operator.ge,
            'in': lambda a, b: a in b}
     allowed_funcs = {'len': len}
+    objs = objs2
     getv = lambda obj, key: (allowed_funcs[key](obj) if key in allowed_funcs else
                              getattr(obj, key, None) if attr is None else
                              getattr(getattr(obj, attr), key, None))
diff --git a/sugar/core/fts.py b/sugar/core/fts.py
index 7b12931..9e4f3ca 100644
--- a/sugar/core/fts.py
+++ b/sugar/core/fts.py
@@ -313,7 +313,7 @@ def name(self, value):
 
     def __repr__(self):
         """Represent Feature as a string for debugging."""
-        return f"Feature('{self.type}', [{', '.join([loc.__repr__() for loc in self.locs])}], meta={self.meta!r})"
+        return f'Feature("{self.type}", [{", ".join([loc.__repr__() for loc in self.locs])}], meta={self.meta!r})'
 
     @property
     def loc_range(self):
diff --git a/sugar/core/seq.py b/sugar/core/seq.py
index e5bf780..68dcdf3 100644
--- a/sugar/core/seq.py
+++ b/sugar/core/seq.py
@@ -41,11 +41,6 @@ class _BioSeqStr():
     def __init__(self, parent):
         self.__parent = parent
 
-    def __deepcopy__(self, orig):
-        # TODO test
-        return self
-
-
     def center(self, width, *args):
         self.__parent.data = self.__parent.data.center(width, *args)
         return self.__parent
@@ -255,16 +250,6 @@ class BioSeq():
     """
 
     def __init__(self, data, id='', meta=None, type=None):
-        #: Namespace holding all available string methods,
-        #: see `_BioSeqStr` for available methods
-        #: and `python:str` for documentation of the methods
-        #:
-        #: .. rubric:: Example:
-        #:
-        #: >>> seq = read()[0]
-        #: >>> seq.str.find('ATG')  # Use string method
-        #: 30
-        self.str = _BioSeqStr(self)
         #: Property holding the data string
         self.data = str(data).upper()
         if hasattr(data, 'meta'):
@@ -299,7 +284,7 @@ def __str__(self):
 
     def __repr__(self):
         metastr = ', '.join(f'{prop}={repr(val)}' for prop, val in vars(self.meta).items())
-        return f'{type(self).__name__}([{repr(self.data)}, meta=dict({metastr}))'
+        return f'{type(self).__name__}({repr(self.data)}, meta=dict({metastr}))'
 
     def __eq__(self, string):
         if isinstance(string, BioSeq):
@@ -349,6 +334,21 @@ def __iadd__(self, other):
     def __radd__(self, other):
         return self.__class__(str(other) + self.data, meta=self.meta)
 
+    @property
+    def str(self):
+        """
+        Namespace holding all available string methods,
+        see `_BioSeqStr` for available methods
+        and `python:str` for documentation of the methods
+
+        .. rubric:: Example:
+
+        >>> seq = read()[0]
+        >>> seq.str.find('ATG')  # Use string method
+        30
+        """
+        return _BioSeqStr(self)
+
     @property
     def id(self):
         """Alias for ``BioSeq.meta.id``"""
@@ -383,7 +383,7 @@ def add_fts(self, fts):
 
         :param fts: features to add
         """
-        self.fts = self.fts + fts
+        self.fts = self.fts + FeatureList(fts)
         self.fts.sort()
 
     @property
@@ -732,20 +732,6 @@ class BioBasket(collections.UserList):
     attribute.
     """
     def __init__(self, data=None, meta=None):
-        # Documentation for str attribute:
-        #: Namespace holding all available string methods,
-        #:
-        #: The `BioBasket.str` methods call the corresponding `BioSeq.str` methods under the hood
-        #: and return either the altered `BioBasket` object or a list with results.
-        #: See `_BioSeqStr` for available methods
-        #: and `python:str` for documentation of the methods
-        #:
-        #: .. rubric:: Example:
-        #:
-        #: >>> seqs = read()
-        #: >>> seqs.str.find('ATG')  # Use string method
-        #: [30, 12]
-        self.str = _BioBasketStr(self)
         if data is None:
             data = []
         if hasattr(data, 'meta'):
@@ -766,6 +752,24 @@ def __eq__(self, other):
             return self.data == other.data and self.meta == other.meta
         return self.data == other
 
+    @property
+    def str(self):
+        """
+        Namespace holding all available string methods.
+
+        The `BioBasket.str` methods call the corresponding `BioSeq.str` methods under the hood
+        and return either the altered `BioBasket` object or a list with results.
+        See `_BioSeqStr` for available methods
+        and `python:str` for documentation of the methods.
+
+        .. rubric:: Example:
+
+        >>> seqs = read()
+        >>> seqs.str.find('ATG')  # Use string method
+        [30, 12]
+        """
+        return _BioBasketStr(self)
+
     @property
     def ids(self):
         """List of sequence ids"""
@@ -805,7 +809,7 @@ def add_fts(self, fts):
         for seq in self:
             if seq.id in fts:
                 seq.fts = seq.fts + fts.pop(seq.id)
-                seq.meta.fts.sort()
+                seq.fts.sort()
         if len(fts) > 0:
             missing_ids = ', '.join(fts.keys())
             warn(f'Features for seqids {missing_ids} could not be '
diff --git a/sugar/data/__init__.py b/sugar/data/__init__.py
index 7872987..f307ddc 100644
--- a/sugar/data/__init__.py
+++ b/sugar/data/__init__.py
@@ -65,8 +65,9 @@ def _submat_files():
 @lru_cache
 def submat(fname):
     """
-    Return substition matrix as a dict of dicts
+    Return substitution matrix as a dict of dicts
 
+    >>> from sugar.data import submat
     >>> bl = submat('blosum62')
     >>> bl['A']['A']
     4
@@ -123,6 +124,7 @@ def gcode(tt=1):
 
     :param tt: number of the translation table (default: 1)
 
+    >>> from sugar.data import gcode
     >>> gc = gcode()
     >>> gc.tt['TAG']
     '*'
diff --git a/sugar/tests/test_core_cane.py b/sugar/tests/test_core_cane.py
index 71076ff..78eed5c 100644
--- a/sugar/tests/test_core_cane.py
+++ b/sugar/tests/test_core_cane.py
@@ -1,7 +1,7 @@
 # (C) 2024, Tom Eulenfeld, MIT license
 import pytest
 
-from sugar import read, read_fts
+from sugar import read, read_fts, BioSeq, BioBasket
 from  sugar.core.cane import translate
 
 
@@ -38,6 +38,45 @@ def test_translate_final_stop():
 
 # TODO more translation tests
 
+def test_match():
+    seq = BioSeq('NNNUAGDDDUAGAUG')
+    seqs = BioBasket([seq])
+    seq2 = BioSeq('-UU-U-AG')
+    assert seq.match('stop').start() == 3
+    assert seq.match('start').end() == len(seq)
+    matches = seq.matchall('stop')
+    assert matches[0].span() == seq.match('stop').span()
+    assert len(matches) == 2
+    assert seqs.match('stop')[0].start() == 3
+    matches = seq2.matchall('stop', gap=None)
+    assert seqs.matchall('stop')[0].start() == 3
+    assert len(matches) == 0
+    match = seq2.match('stop', gap='-')
+    assert match.group() == 'U-AG'
+    assert seq2.match('stop', gap='-', rf=1) == None
+    assert seq2.match('stop', gap='-', rf=2).group() == 'U-AG'
+    assert seq2.match('stop', gap='-', rf=(1, 2)).group() == 'U-AG'
+    assert seq2.match('stop', gap='-', rf=(0, 1)) == None
+    seq3 = seq2.copy().rc()
+    match3 = seq3.match('stop', gap='-', rf='bwd')
+    assert match.span() == match3._match.span()
+    assert match.span() != match3.span()
+
+
+def test_orf():
+    seqs=read()
+    orfs = seqs[0].find_orfs()
+    assert len(orfs) > 0
+    longest_orf = orfs.sort(len)[-1]
+    assert seqs[0][longest_orf] == seqs[0]['cds']
+
+    orfs2 = seqs[0].find_orfs(rf='both')
+    assert len(orfs2) > len(orfs)
+
+    orfs = seqs.find_orfs()
+    for id_ in seqs.ids:
+        assert seqs.d[id_][orfs.d[id_].sort(len)[-1]] == seqs.d[id_]['cds']
+
 
 def test_filter_fts():
     fts = read_fts()
@@ -54,6 +93,11 @@ def test_filter_seqs():
     seqs = read()
     seqs.filter(len_gt=9500)
     assert len(seqs) == 1
+    seqs = read()
+    seqs2 = seqs.filter(len_gt=9500, inplace=False)
+    assert len(seqs2) == 1
+    assert len(seqs2) < len(seqs)
+
 
 
 def test_groupby_fts_nested():
diff --git a/sugar/tests/test_core_seq.py b/sugar/tests/test_core_seq.py
index fa3cab2..8c099ac 100644
--- a/sugar/tests/test_core_seq.py
+++ b/sugar/tests/test_core_seq.py
@@ -5,10 +5,17 @@
 from sugar.tests.util import tempfilename
 
 
+def test_siformat():
+    from sugar.core.seq import _si_format
+    assert _si_format(10000) == '10k'
+    assert _si_format(0) == '0'
+
+
 def test_attr():
     assert Attr(a=1) == Attr(a=1)
     assert Attr(a=1) != Attr(a=2)
 
+
 def test_bioseq_equal():
     s1 = BioSeq('bla', id='5')
     s2 = BioSeq('bla', id='5')
@@ -73,6 +80,11 @@ def test_copy():
     assert seq.copy()[10:] != seq
     assert len(seq.copy()[10:]) == n - 10
     assert seq.copy() == seq
+    seqs = read()
+    seqs2 = seqs.copy()
+    assert seqs2 == seqs
+    seqs2[0].data = 'NNN'
+    assert seqs2 != seqs
 
 
 def test_countall():
@@ -102,10 +114,22 @@ def test_meta_str():
     assert 'CDS' in str(meta)
 
 
+def test_biobasket_str():
+    seqs = read()
+    seqs2 = seqs.copy()
+    seqs2.data = []
+    assert str(seqs2).startswith('0 seq')
+    seqs2 = seqs.copy()
+    seqs.data = 10 * seqs.data
+    assert '...' in str(seqs2)
+
+
 def test_shortcuts():
     seq = read()[0]
     assert seq.id == seq.meta.id
     assert seq.fts == seq.meta.fts
+    seq.id = 'XXX'
+    assert seq.id == seq.meta.id
 
 
 def test_getitem():
@@ -140,6 +164,13 @@ def test_getitem():
     # assert seqs[0][3:6].meta.features[0].orig_len == 4
     # assert len(seqs[0][10:20].meta.features) == 0
 
+    ## TODO!!!
+
+
+def test_sl_slicable_inplace():
+    seqs = read()
+    assert seqs.sl()[:1] == seqs[:1]
+
 
 def test_setitem():
     seqs = read()
@@ -150,6 +181,9 @@ def test_setitem():
     seqs[0] = 'ABC'
     assert isinstance(seqs[0], BioSeq)
     assert seqs[0] == 'ABC'
+    seqs = read()
+    seqs[:2] = ['AGT', 'TGA']
+    assert str(seqs[0]) == 'AGT'
 
 
 def test_add_fts():
@@ -167,41 +201,34 @@ def test_add_fts():
     assert seq.fts[1] == ft
     assert seq.fts[-1] != ft
 
+    ft = seqs.fts[0]
+    ft.seqid = 'unknown'
+    with pytest.warns(UserWarning, match='.*unknown'):
+        seqs.add_fts([ft])
+    with pytest.warns(UserWarning, match='.*unknown'):
+        seqs.fts = [ft]
+    with pytest.warns(UserWarning, match='.*mismatch'):
+        seqs[0].add_fts([ft])
+
+
+def test_biobasket_rc():
+    seqs = read()
+    seqs2 = seqs.copy().rc()
+    assert seqs[0].rc() == seqs2[0]
+
+
+def test_repr():
+    from sugar import Location, Meta
+    seqs = read()
+    assert eval(repr(seqs[0])) == seqs[0]
+    assert eval(repr(seqs)) == seqs
+
+
+def test_magic_methods():
+    # TODO
+    pass
+
 
-def test_match():
-    seq = BioSeq('NNNUAGDDDUAGAUG')
-    seqs = BioBasket([seq])
-    seq2 = BioSeq('-UU-U-AG')
-    assert seq.match('stop').start() == 3
-    assert seq.match('start').end() == len(seq)
-    matches = seq.matchall('stop')
-    assert matches[0].span() == seq.match('stop').span()
-    assert len(matches) == 2
-    assert seqs.match('stop')[0].start() == 3
-    matches = seq2.matchall('stop', gap=None)
-    assert len(matches) == 0
-    match = seq2.match('stop', gap='-')
-    assert match.group() == 'U-AG'
-    assert seq2.match('stop', gap='-', rf=1) == None
-    assert seq2.match('stop', gap='-', rf=2).group() == 'U-AG'
-    assert seq2.match('stop', gap='-', rf=(1, 2)).group() == 'U-AG'
-    assert seq2.match('stop', gap='-', rf=(0, 1)) == None
-    seq3 = seq2.copy().rc()
-    match3 = seq3.match('stop', gap='-', rf='bwd')
-    assert match.span() == match3._match.span()
-    assert match.span() != match3.span()
-
-
-def test_orf():
-    seqs=read()
-    orfs = seqs[0].find_orfs()
-    assert len(orfs) > 0
-    longest_orf = orfs.sort(len)[-1]
-    assert seqs[0][longest_orf] == seqs[0]['cds']
-
-    orfs2 = seqs[0].find_orfs(rf='both')
-    assert len(orfs2) > len(orfs)
-
-    orfs = seqs.find_orfs()
-    for id_ in seqs.ids:
-        assert seqs.d[id_][orfs.d[id_].sort(len)[-1]] == seqs.d[id_]['cds']
+def test_str_methods():
+    # TODO
+    pass
diff --git a/sugar/tests/test_docs.py b/sugar/tests/test_docs.py
index 71a52da..2061156 100644
--- a/sugar/tests/test_docs.py
+++ b/sugar/tests/test_docs.py
@@ -2,20 +2,22 @@
 
 from contextlib import redirect_stdout
 import io
+from sugar import read, read_fts
 
 
 def doctest_module(m):
     from doctest import testmod, ELLIPSIS
     raised = False
     flags = ELLIPSIS
+    globs = {'read': read, 'read_fts': read_fts}
     try:
-        testmod(m, raise_on_error=True, optionflags=flags)
+        testmod(m, raise_on_error=True, optionflags=flags, extraglobs=globs)
     except Exception:
         raised = True
     if raised:
         report = io.StringIO()
         with redirect_stdout(report):
-            testmod(m, optionflags=flags, report=True)
+            testmod(m, optionflags=flags, report=True, extraglobs=globs)
         assert report.getvalue() == ''
 
 
diff --git a/sugar/tests/test_io.py b/sugar/tests/test_io.py
index 11889a4..101e0f6 100644
--- a/sugar/tests/test_io.py
+++ b/sugar/tests/test_io.py
@@ -69,6 +69,18 @@ def test_io_file():
                 assert str(seq2) == str(seq1)
 
 
+def test_write_fmtstr_seq():
+    seqs = read()
+    with tempfile.NamedTemporaryFile() as f:
+        seqs[0].write(f.name, 'fasta')
+        f.seek(0)
+        seqs2 = read(f.name)
+        assert str(seqs2[0]) == str(seqs[0])
+    s = seqs[0].tofmtstr('fasta')
+    seqs2 = seqs.fromfmtstr(s)
+    assert str(seqs2[0]) == str(seqs[0])
+
+
 def test_io_fmtstr():
     seqs = read()
     for fmt in TESTIOFMTS: