Skip to content

Commit

Permalink
Merge pull request #87 from MaozGelbart/motifpssm_fixes
Browse files Browse the repository at this point in the history
FIX: MotifPssmPattern load from file now loads correctly
  • Loading branch information
veghp authored Oct 14, 2024
2 parents f200677 + 70eea9d commit aef8733
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 3 deletions.
2 changes: 1 addition & 1 deletion dnachisel/SequencePattern/MotifPssmPattern.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def list_from_file(
sequence(s) with the absolute highest possible score".
"""
if isinstance(motifs_file, str):
with open("./jaspar.txt", "r") as f:
with open(motifs_file, "r") as f:
motifs_list = motifs.parse(f, file_format)
else:
motifs_list = motifs.parse(motifs_file, file_format)
Expand Down
33 changes: 33 additions & 0 deletions tests/data/multiple_motifs.meme.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
MEME version 4

ALPHABET= ACGT

strands: + -

Background letter frequencies
A 0.25 C 0.25 G 0.25 T 0.25

MOTIF MA0016.1 MA0016.1.usp
letter-probability matrix: alength= 4 w= 10 nsites= 38 E= 0
0.000000 0.026316 0.973684 0.000000
0.026316 0.000000 0.947368 0.026316
0.000000 0.000000 1.000000 0.000000
0.000000 0.000000 1.000000 0.000000
0.000000 0.000000 0.000000 1.000000
0.000000 0.947368 0.026316 0.026316
0.921053 0.000000 0.078947 0.000000
0.131579 0.657895 0.078947 0.131579
0.131579 0.210526 0.578947 0.078947
0.157895 0.263158 0.421053 0.157895
URL http://jaspar.genereg.net/matrix/MA0016.1

MOTIF MA0011.2 MA0011.2.br
letter-probability matrix: alength= 4 w= 6 nsites= 12 E= 0
0.000000 0.833333 0.000000 0.166667
0.000000 0.083333 0.000000 0.916667
1.000000 0.000000 0.000000 0.000000
0.083333 0.083333 0.166667 0.666667
0.166667 0.000000 0.083333 0.750000
0.083333 0.166667 0.083333 0.666667
URL http://jaspar.genereg.net/matrix/MA0011.2

19 changes: 19 additions & 0 deletions tests/data/single_motif.meme.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
MEME version 4

ALPHABET= ACGT

strands: + -

Background letter frequencies
A 0.25 C 0.25 G 0.25 T 0.25

MOTIF MA0011.2 MA0011.2.br
letter-probability matrix: alength= 4 w= 6 nsites= 12 E= 0
0.000000 0.833333 0.000000 0.166667
0.000000 0.083333 0.000000 0.916667
1.000000 0.000000 0.000000 0.000000
0.083333 0.083333 0.166667 0.666667
0.166667 0.000000 0.083333 0.750000
0.083333 0.166667 0.083333 0.666667
URL http://jaspar.genereg.net/matrix/MA0011.2

34 changes: 32 additions & 2 deletions tests/test_patterns.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,39 @@
from dnachisel.SequencePattern import SequencePattern
import pytest
from pathlib import Path

from dnachisel import SequencePattern, MotifPssmPattern


@pytest.fixture
def test_single_motif_filepath():
return str(Path(__file__).parent / 'data' / 'single_motif.meme.txt')


@pytest.fixture
def test_multiple_motif_filepath():
return str(Path(__file__).parent / 'data' / 'multiple_motifs.meme.txt')


def test_patterns_from_string():
pattern = SequencePattern.from_string("6xT")
assert pattern.expression == "TTTTTT"
pattern = SequencePattern.from_string("BsmBI_site")
assert pattern.expression == "CGTCTC"
pattern = SequencePattern.from_string("5x2mer")
assert pattern.expression == '([ATGC]{2})\\1{4}'
assert pattern.expression == '([ATGC]{2})\\1{4}'


def test_pssm_pattern_from_file(
test_single_motif_filepath, test_multiple_motif_filepath
):
single_pattern = MotifPssmPattern.list_from_file(
test_single_motif_filepath, "minimal", relative_threshold=0.9
)
assert len(single_pattern) == 1
assert all([isinstance(p, MotifPssmPattern) for p in single_pattern])

multiple_patterns = MotifPssmPattern.list_from_file(
test_multiple_motif_filepath, "minimal", relative_threshold=0.9
)
assert len(multiple_patterns) == 2
assert all([isinstance(p, MotifPssmPattern) for p in multiple_patterns])

0 comments on commit aef8733

Please sign in to comment.