Skip to content

Commit

Permalink
init translate changes
Browse files Browse the repository at this point in the history
  • Loading branch information
bluegenes committed Jun 10, 2021
1 parent 6b5806c commit d85cbfc
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 9 deletions.
2 changes: 1 addition & 1 deletion src/core/src/encodings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ pub fn to_aa(seq: &[u8], dayhoff: bool, hp: bool) -> Result<Vec<u8>, Error> {
let mut converted: Vec<u8> = Vec::with_capacity(seq.len() / 3);

for chunk in seq.chunks(3) {
if chunk.len() < 3 {
if chunk.len() < 2 {
break;
}

Expand Down
25 changes: 17 additions & 8 deletions tests/test_minhash.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,13 +219,22 @@ def test_dayhoff(track_abundance):
mh_dayhoff = MinHash(10, 2, is_protein=True,
dayhoff=True, hp=False, track_abundance=track_abundance)
mh_dayhoff.add_sequence('ACTGAC')

assert len(mh_dayhoff.hashes) == 2
# forward:
# - ACT GAC (prot: ID; dayhoff: ec; hp: hp)
# - CTG ACN (prot: LT; dayhoff: eb; hp: hp)
# - TGA C (prot: *X; dayhoff: XX; hp: *X)
# reverse:
# - CAG TCA (prot: QS; dayhoff: cb; hp: pp)
# - AGT CAN (prot: SX; dayhoff: bX; hp: pX)
# - GTC A (prot: VX, dayhoff: eX; hp: hX)
#

assert len(mh_dayhoff.hashes) == 3
# verify that dayhoff-encoded hashes are different from protein/aa hashes
mh_protein = MinHash(10, 2, is_protein=True, track_abundance=track_abundance)
mh_protein.add_sequence('ACTGAC')

assert len(mh_protein.hashes) == 2
assert len(mh_protein.hashes) == 4
print(mh_protein.hashes)
print(mh_dayhoff.hashes)
assert mh_protein.hashes != mh_dayhoff.hashes
Expand All @@ -239,12 +248,12 @@ def test_hp(track_abundance):

mh_hp.add_sequence('ACTGAC')

assert len(mh_hp.hashes) == 2
assert len(mh_hp.hashes) == 3
# verify that hp-encoded hashes are different from protein/aa hashes
mh_protein = MinHash(10, 2, is_protein=True, track_abundance=track_abundance)
mh_protein.add_sequence('ACTGAC')

assert len(mh_protein.hashes) == 2
assert len(mh_protein.hashes) == 4
assert mh_protein.hashes != mh_hp.hashes


Expand Down Expand Up @@ -1180,7 +1189,7 @@ def test_set_abundance_clear_3():

a.add_hash(10)
assert a.hashes == {10: 1}

a.set_abundances({20: 1, 30: 4}, clear=False)
assert a.hashes == {10: 1, 20: 1, 30: 4}

Expand Down Expand Up @@ -1209,8 +1218,8 @@ def test_clear_abundance_on_zero():
mh.set_abundances({ 2: -1 }) # Test on clear = True

with pytest.raises(ValueError):
mh.set_abundances({ 2: -1 }, clear=False)
mh.set_abundances({ 2: -1 }, clear=False)

assert len(mh) == 2 # Assert that nothing was affected

def test_reset_abundance_initialized():
Expand Down

0 comments on commit d85cbfc

Please sign in to comment.