diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 83b46a7..7d44590 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,21 @@ Changelog ========= + +0.3.0 +----- + +* Functions ``from_dict`` renamed ``from_sequence_dict``, and ``to_dict`` + renamed ``to_sequence_dict``. +* Added tests for the functions ``from_sequence_dict`` and ``to_sequence_dict``. +* Installation now requires pandas >= 0.21. + +Release date: 2019-05-19 + +`View commits `_ + + + 0.2.0 ----- diff --git a/README.rst b/README.rst index c8be93e..6076110 100644 --- a/README.rst +++ b/README.rst @@ -12,7 +12,7 @@ Convert between the following objects: * BioPython MultipleSeqAlignment <-> pandas DataFrame * DendroPy CharacterMatrix <-> pandas DataFrame -* Python dictionary <-> pandas DataFrame +* "Sequence dictionary" <-> pandas DataFrame The code has been tested with Python 2.7, 3.5 and 3.6. @@ -214,8 +214,8 @@ pandas DataFrame to BioPython MultipleSeqAlignment TG-AA t3 -Python dictionary to pandas DataFrame -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +"Sequence dictionary" to pandas DataFrame +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code-block:: pycon @@ -226,7 +226,7 @@ Python dictionary to pandas DataFrame ... 't2': 'TGCAA', ... 't3': 'TG-AA' ... } - >>> df = pc.from_dict(d) + >>> df = pc.from_sequence_dict(d) >>> df t1 t2 t3 0 T T T @@ -236,8 +236,8 @@ Python dictionary to pandas DataFrame 4 A A A -pandas DataFrame to Python dictionary -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +pandas DataFrame to "sequence dictionary" +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code-block:: pycon @@ -247,7 +247,7 @@ pandas DataFrame to Python dictionary ... 't1': ['T', 'C', 'C', 'A', 'A'], ... 't2': ['T', 'G', 'C', 'A', 'A'], ... 't3': ['T', 'G', '-', 'A', 'A']}) - >>> pc.to_dict(df) + >>> pc.to_sequence_dict(df) {'t1': 'TCCAA', 't2': 'TGCAA', 't3': 'TG-AA'} diff --git a/pandascharm.py b/pandascharm.py index c97bd96..01b87f6 100755 --- a/pandascharm.py +++ b/pandascharm.py @@ -6,7 +6,7 @@ __author__ = 'Markus Englund' __license__ = 'MIT' -__version__ = '0.2.0' +__version__ = '0.3.0' def frame_as_categorical(frame, include_categories=None): @@ -83,7 +83,16 @@ def from_charmatrix(charmatrix, categorical=True): return new_frame -def from_dict(d, categorical=True): +def from_sequence_dict(d, categorical=True): + """ + Convert a dict with sequences as strings to a pandas DataFrame. + + Parameters + ---------- + d : dict + categorical : bool (default: True) + If True, the result will be returned as a categorical frame. + """ d_seq_list = {k: list(v) for (k, v) in d.items()} frame = pandas.DataFrame(d_seq_list) if categorical: @@ -161,5 +170,6 @@ def to_charmatrix(frame, data_type): return charmatrix -def to_dict(frame, into=dict): +def to_sequence_dict(frame, into=dict): + """Convert a pandas DataFrame to a dict with sequences as strings.""" return frame.apply(lambda x: ''.join(x)).to_dict(into=into) diff --git a/requirements.txt b/requirements.txt index 6ff9f11..a1873c5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -pandas>=0.16 +pandas>=0.21 DendroPy>=4.0 BioPython diff --git a/setup.py b/setup.py index 0118b0f..07bfdf8 100755 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ setup( name='pandas-charm', - version='0.2.0', + version='0.3.0', description=( 'A small Python library for getting character matrices ' '(alignments) into and out of pandas'), @@ -16,7 +16,7 @@ join(dirname(__file__), 'README.rst'), encoding='utf-8').read(), packages=find_packages(exclude=['docs', 'tests*']), py_modules=['pandascharm'], - install_requires=['pandas>=0.16'], + install_requires=['pandas>=0.21'], extras_require={'testing': [ 'coverage', 'pytest', 'biopython', 'dendropy']}, author='Markus Englund', diff --git a/test_pandascharm.py b/test_pandascharm.py index cf9c9d0..b6db7c4 100755 --- a/test_pandascharm.py +++ b/test_pandascharm.py @@ -27,8 +27,8 @@ to_charmatrix, from_bioalignment, to_bioalignment, - from_dict, - to_dict) + from_sequence_dict, + to_sequence_dict) class TestAsCategorical(): @@ -198,18 +198,28 @@ def test_invalid_alphabet(self): to_bioalignment(self.dna_frame, alphabet='dna') -class TestDictConversion(): +class TestSequenceDictConversion(): dna_frame = pandas.DataFrame({ 't1': ['T', 'C', 'C', 'A', 'A'], 't2': ['T', 'G', 'C', 'A', 'A'], 't3': ['T', 'G', '-', 'A', 'A']}, dtype='object') + dna_frame_nan = pandas.DataFrame({ + 't1': ['T', 'C', 'C', 'A', 'A'], + 't2': ['T', 'G', 'C', 'A', 'A'], + 't3': ['T', 'G', '-', 'A', numpy.nan]}, dtype='object') + dna_dict = {'t1': 'TCCAA', 't2': 'TGCAA', 't3': 'TG-AA'} - def test_from_dict(self): + def test_from_sequence_dict(self): assert_frame_equal( - from_dict(self.dna_dict, categorical=False), self.dna_frame) + from_sequence_dict(self.dna_dict, categorical=False), + self.dna_frame) + + def test_to_sequence_dict(self): + assert(to_sequence_dict(self.dna_frame) == self.dna_dict) - def test_to_dict(self): - assert(to_dict(self.dna_frame) == self.dna_dict) + def test_do_sequence_dict_nan(self): + with pytest.raises(TypeError): + to_sequence_dict(self.dna_frame_nan)