Skip to content
This repository has been archived by the owner on Jan 13, 2022. It is now read-only.

Commit

Permalink
Merge branch 'DaveMendMaster' into 'master'
Browse files Browse the repository at this point in the history
Dave mend master

See merge request algorithm/taiyaki!380
  • Loading branch information
tmassingham-ont committed Nov 12, 2020
2 parents cba4b4a + 712e46b commit d04b340
Show file tree
Hide file tree
Showing 8 changed files with 83 additions and 83 deletions.
6 changes: 3 additions & 3 deletions misc/align.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,11 @@ def call_bwa_mem(fin, fout, genome, clargs=''):
genome (str): path to reference to align against
clargs (str): optional cmd line arguments to pass to bwa as a string
Returns:
Returns:
str: stdout of bwa command
Raises:
:subprocess:`CalledProcessError`: subprocess error message from bwa call
Raises:
:subprocess:`CalledProcessError`: subprocess err. message from bwa call
"""
command_line = "bwa mem {} {} {} > {}".format(clargs, genome, fin, fout)
try:
Expand Down
5 changes: 2 additions & 3 deletions misc/plot_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,16 +53,15 @@ def get_parser():


def moving_average(a, n=3):
""" Generate moving average.
""" Generate moving average.
Args:
a (:np:`ndarray`) : 1D input array
n (int, optional) : square window length
Returns:
:np:`ndarray` : 1D output array
Note: If length of a is less than n, and for elements earlier than the nth,
Note: If length of a is less than n, and for elements earlier than the nth,
average as many points as available.
"""
x = np.cumsum(a, dtype=float)
Expand Down
38 changes: 19 additions & 19 deletions taiyaki/decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,17 @@
def flipflop_viterbi(scores, _never_use_cupy=False):
""" Find highest scoring flipflop paths for a batch of score matrices.
Args:
scores (:torch:`Tensor`): batch of score matrices with dimensions
[T, batch size, S] where T is the number of blocks (time axis) and S
is the number of distinct flipflop transitions. For 4 bases S = 40,
and in general S = 2 * nbase * (nbase + 1). Note that the input
scores should be on a log scale, i.e. the score of a path is
scores (:torch:`Tensor`): batch of score matrices with dimensions
[T, batch size, S] where T is the number of blocks (time axis) and
S is the number of distinct flipflop transitions. For 4 bases
S = 40, and in general S = 2 * nbase * (nbase + 1). Note that the
input scores should be on a log scale, i.e. the score of a path is
determined by summing the scores of the individual transitions.
_never_use_cupy (bool): this method delegates to cupy implementation if
possible, unless _never_use_cupy=True, defaults to False
Returns:
tuple(:torch:`Tensor`, :torch:`Tensor`, :torch:`Tensor`):
tuple(:torch:`Tensor`, :torch:`Tensor`, :torch:`Tensor`):
fwd scores tensor, traceback tensor, flipflop path tensor
"""
use_cupy = all([
Expand All @@ -43,18 +43,18 @@ def flipflop_make_trans(scores, _never_use_cupy=False):
""" Calculates posterior probabilities (not logs!) from raw model output.
Args:
scores (:torch:`Tensor`): batch of score matrices with dimensions
[T, batch size, S] where T is the number of blocks (time axis) and S
is the number of distinct flipflop transitions. For 4 bases S = 40,
and in general S = 2 * nbase * (nbase + 1). This should consist of
scores (:torch:`Tensor`): batch of score matrices with dimensions
[T, batch size, S] where T is the number of blocks (time axis) and
S is the number of distinct flipflop transitions. For 4 bases
S = 40, and in general S=2*nbase*(nbase+1). This should consist of
globally normalised transition scores for a flipflop CRF.
_never_use_cupy (bool): this method delegates to cupy implementation if
possible, unless _never_use_cupy=True, defaults to False
Returns:
Returns:
:torch:`Tensor`: floats of shape (T x batch size x S) containing
posterior transition probabilities (not logs!) It can be verified
that this is equivalent to the derivative of the log-partition
posterior transition probabilities (not logs!) It can be verified
that this is equivalent to the derivative of the log-partition
function with respect to the raw scores.
"""
use_cupy = all([
Expand All @@ -74,17 +74,17 @@ def flipflop_make_trans(scores, _never_use_cupy=False):

@torch.no_grad()
def _flipflop_viterbi(scores):
""" Find highest scoring flipflop paths for a batch of score matrices. This
""" Find highest scoring flipflop paths for a batch of score matrices. This
is an idiomatic pytorch implementation.
Args:
scores (:torch:`Tensor`): batch of score matrices with dimensions
[T, batch size, S] where T is the number of blocks (time axis) and S
is the number of distinct flipflop transitions. For 4 bases S = 40,
and in general S = 2 * nbase * (nbase + 1).
scores (:torch:`Tensor`): batch of score matrices with dimensions
[T, batch size, S] where T is the number of blocks (time axis) and
S is the number of distinct flipflop transitions. For 4 bases
S = 40, and in general S = 2 * nbase * (nbase + 1).
Returns:
tuple(:torch:`Tensor`, :torch:`Tensor`, :torch:`Tensor`):
tuple(:torch:`Tensor`, :torch:`Tensor`, :torch:`Tensor`):
fwd scores tensor, traceback tensor, flipflop path tensor
"""
T, N, S = scores.shape
Expand Down
42 changes: 21 additions & 21 deletions taiyaki/fast5utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,13 @@ def iterate_file_read_pairs(filepaths, read_ids, limit=None, verbose=0):


def iterate_files_reads_unpaired(filepaths, read_ids, limit=None, verbose=0):
""" Iterate over unpaired lists of filepaths and read_ids, looking in all
the files given and returning only those read_ids in the read_ids list. If
""" Iterate over unpaired lists of filepaths and read_ids, looking in all
the files given and returning only those read_ids in the read_ids list. If
read_ids is None then get all the reads in the files.
Args:
filepaths (list of str): list of filepaths
read_ids (list of str or None): list of read ids or None for all reads
read_ids (list of str or None): list of read ids or None for all reads
in each file
limit (int, optional): Maximum number of tuples to produce
verbose (int, optional): Output level of debug verbosity
Expand Down Expand Up @@ -90,40 +90,40 @@ def iterate_files_reads_unpaired(filepaths, read_ids, limit=None, verbose=0):

def iterate_fast5_reads(
path, strand_list=None, limit=None, verbose=0, recursive=False):
""" Iterate over reads in a directory of fast5 files or a single fast5 file.
Files may be single or multi-read fast5s.
""" Iterate over reads in a directory of fast5 files or a single fast5
file. Files may be single or multi-read fast5s.
Args:
path (str): Directory (or filename for a single file)
strand_list (str or None, optional): Path to file containing list of
files and/or read ids to iterate over (as described in notes) or
strand_list (str or None, optional): Path to file containing list of
files and/or read ids to iterate over (as described in notes) or
None for all files and reads
limit (int or None, optional): Maximum number of reads to consider or
limit (int or None, optional): Maximum number of reads to consider or
None for all
verbose (int, optional): 0 prints no messages, 1 prints a message for
verbose (int, optional): 0 prints no messages, 1 prints a message for
every file read, 2 prints the list of files before starting as well
recursive (bool, optional): Search path recursively for fast5 files
Yields:
(tuple(str, str)): filepath and read_id for each read. You may say, "why
not yield an ont_fast_api object instead of a nasty tuple?" I would
say: "yes, I did try that, but it led to unfathomable nastiness when
I fed these objects in as arguments to multiple processes."
(tuple(str, str)): filepath and read_id for each read. You may say,
"why not yield an ont_fast_api object instead of a nasty tuple?" I
would say: "yes, I tried that, but it led to unfathomable nastiness
when I fed these objects in as arguments to multiple processes."
Notes:
If strand_list is given, then only return the reads spcified, according
If strand_list is given, then only return the reads spcified, according
to the following rules:
(A) If the strand list file has a column 'read_id' and no column
'filename' or 'filename_fast5' then look through all fast5 files in
'filename' or 'filename_fast5' then look through all fast5 files in
the path and return all reads with read_ids in that column.
(B) If the strand list file has a column 'filename' or 'filename_fast5'
and no column 'read_id' then look through all filenames specified
and no column 'read_id' then look through all filenames specified
and return all reads in them.
(C) If the strand list has a column 'filename' or 'filename_fast5' _and_
a column 'read_id' then loop through the rows in the strand list,
returning the appropriate tuple for each row. We check that each
file exists and contains the read_id.
(C) If the strand list has a column 'filename' or 'filename_fast5'
_and_ a column 'read_id' then loop through the rows in the strand
list, returning the appropriate tuple for each row. We check that
each file exists and contains the read_id.
Example:
read_iterator = iterate_fast5_reads('directory')
Expand Down Expand Up @@ -212,7 +212,7 @@ def get_filename(read):


def get_channel_info(read):
"""Get channel info from read object.
"""Get channel info from read object.
Args:
read (:ont_fast5_api:`Fast5Read`): the read object
Expand Down
64 changes: 32 additions & 32 deletions taiyaki/qscores.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ def qchar_from_qscore(score, zerochar=33):
"""Return ASCII character(s) encoding q score from score.
Args:
score (float or list or :np:`ndarray`) : float or list or 1D input array
of error prob = 10^(-score/10)
zerochar (int, optional) : ASCII code character encoding probability 1
score (float or list or :np:`ndarray`) : float or list or 1D input
array of error prob = 10^(-score/10)
zerochar (int, optional) : ASCII code character encoding probability 1
(score 0)
Returns:
Expand All @@ -33,8 +33,8 @@ def qscore_from_errprob(errprob):
Args:
errprob (scalar or :np:`ndarray`): probability of error
Returns:
scalar or :np:`ndarray`: -10 log_10(errprob)
Returns:
scalar or :np:`ndarray`: -10 log_10(errprob)
"""
return -10.0 * np.log10(errprob)

Expand All @@ -44,7 +44,7 @@ def qchar_from_errprob(errprob, qscore_scale, qscore_offset):
Args:
errprob (scalar or :np:`ndarray`) : probability of error
qscore_scale (scalar): qscore <-- qscore * qscore_scale + qscore_offset,
qscore_scale (scalar): qscore <-- qscore*qscore_scale + qscore_offset,
before encoding it as a character
qscore_offset (scalar): see qscore_scale above
Expand All @@ -56,7 +56,7 @@ def qchar_from_errprob(errprob, qscore_scale, qscore_offset):


def transitions_into_base(b, nbases, device):
"""Return all transition-matrix indices for all transitions into base
"""Return all transition-matrix indices for all transitions into base
(flip or flop).
Args:
Expand All @@ -66,11 +66,11 @@ def transitions_into_base(b, nbases, device):
E.g. 'cpu', 1, 'cuda:1'
Returns:
:torch:`Tensor` : 1D tensor of longs representing indices
:torch:`Tensor` : 1D tensor of longs representing indices
(in range 0 to 39) for ACGT.
Note:
All transitions, including those where no base is emitted, are included.
All transitions, inc. those where no base is emitted, are included.
"""
# Transition A to b_flip
colstart = nbases * 2 * b
Expand All @@ -97,22 +97,22 @@ def errprobs_from_trans(trans, path):
errorprob = 1-p
Args:
trans (:torch:`Tensor`): Tensor of floats with shape
(nblocks x batchsize x nstates) where nstates = 40 for 4-base models
containing posterior transition weights (not logs!)
path (:torch:`Tensor`): Tensor of longs with shape
((nblocks+1) x batchsize) containing flip-flop states (integers 0-7
for 4-base models). The transition that goes with trans[n,bn,:] is
trans (:torch:`Tensor`): Tensor of floats with shape
(nblocks x batchsize x nstates) where nstates = 40 for 4-base
models containing posterior transition weights (not logs!)
path (:torch:`Tensor`): Tensor of longs with shape
((nblocks+1) x batchsize) containing flip-flop states (integers 0-7
for 4-base models). The transition that goes with trans[n,bn,:] is
the one from path[n,bn] to path[n+1,bn].
Returns:
:torch:`Tensor` : errorprob = tensor of floats with shape
:torch:`Tensor` : errorprob = tensor of floats with shape
((nblocks+1) x batchsize) containing errorprob for each element of
the path, and -1.0 in row 0. Note that this doesn't matter since
these probabilities are removed later on in the pipeline. The output
matrix must be the same shape as the path in order to be fed into
the stitching function.
the path, and -1.0 in row 0. Note that this doesn't matter since
these probabilities are removed later on in the pipeline. The
output matrix must be the same shape as the path in order to be fed
into the stitching function.
"""
nblocks, batchsize, flip_flop_transitions = trans.shape
nbases = flipflopfings.nbase_flipflop(flip_flop_transitions)
Expand Down Expand Up @@ -146,23 +146,23 @@ def path_errprobs_to_qstring(errprobs, path, qscore_scale, qscore_offset):
"""Make qscore string from error probs, ignoring stays.
Args:
errprobs (:torch:`Tensor` or :np:`ndarray`): 1D tensor of floats or 1D
errprobs (:torch:`Tensor` or :np:`ndarray`): 1D tensor of floats or 1D
input array containing error probabilities for each element of path
path (:torch:`Tensor` or :np:`ndarray`): 1D tensor of longs or 1D input
array of ints containing flip-flop states for each block, same
path (:torch:`Tensor` or :np:`ndarray`): 1D tensor of longs or 1D input
array of ints containing flip-flop states for each block, same
length as errprobs
qscore_scale (scalar): qscore <-- qscore * qscore_scale + qscore_offset,
qscore_scale (scalar): qscore <-- qscore*qscore_scale + qscore_offset,
before encoding it as a character
qscore_offset (scalar): see qscore_scale above
Returns:
str : representing quality scores encoded as ASCII characters
Returns:
str : representing quality scores encoded as ASCII characters
Note:
Elements of the path where no base is emitted are not included in the
qstring, and the source base for the first transition is also not
included. So the qstring is the same length as the basecall (provided we
don't include the source base for the first transition in the basecall)
Note:
Elements of the path where no base is emitted are not included in the
qstring, and the source base for the first transition is also not
included. So the qstring is the same length as the basecall (provided
we don't inc. the source base for the first transition in the basecall)
"""
filtered_probs = errprobs[1:][path[1:] != path[:-1]]
if type(filtered_probs) == torch.Tensor:
Expand Down
4 changes: 2 additions & 2 deletions test/acceptance/test_merge_mappedsignalfiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def count_reads(self, mapped_signal_file, print_readlist=True):
return len(read_ids)

def test_merge(self):
"""Test that merging two 'normal' mapped signal files produces the
"""Test that merging two 'normal' mapped signal files produces the
output expected."""
test_work_dir = self.work_dir("test_merge")
merged_mapped_signal_file = os.path.join(
Expand Down Expand Up @@ -95,7 +95,7 @@ def test_merge(self):
self.assertTrue(numreads_in > 2)

def test_merge_batch(self):
"""Test that merging two 'batch' mapped signal files produces the
"""Test that merging two 'batch' mapped signal files produces the
output expected."""
test_work_dir = self.work_dir("test_merge_batch")
merged_mapped_signal_file = os.path.join(
Expand Down
5 changes: 3 additions & 2 deletions test/unit/test_cmdargs.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ def test_nonnegative_valid_float_values(self):
self.assertAlmostEqual(x, f(x))

def test_nonnegative_invalid_float_values(self):
"""Test that invalid floats don't get through non-negative arg check."""
"""Test that invalid floats don't get through non-negative arg
check."""
f = cmdargs.NonNegative(float)
for x in [-1.0, -self.EPS, -1e-5]:
with self.assertRaises(argparse.ArgumentTypeError):
Expand All @@ -71,7 +72,7 @@ def test_proportion_valid_float_values(self):
self.assertAlmostEqual(x, f(x))

def test_proportion_invalid_float_values(self):
"""Test that invalid floats don't get through proportion (0-1) arg
"""Test that invalid floats don't get through proportion (0-1) arg
check."""
f = cmdargs.proportion
for x in [-1e-30, -self.EPS, -1e-5, 1.0 + 1e-5, 1.0 + self.EPS]:
Expand Down
2 changes: 1 addition & 1 deletion test/unit/test_maths.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def setUpClass(self):
np.random.seed(0xdeadbeef)

def test_004_med_mad(self):
"""Test to see if med_mad works without setting axis (so flattening)."""
"""Test to see if med_mad works with axis not set (so flattening)."""
x = np.array(
[[0.5, 0.5, 0.5, 0.5], [0.5, 0.5, 1.0, 1.0], [0.0, 0.5, 0.5, 1.0]])
factor = 1
Expand Down

0 comments on commit d04b340

Please sign in to comment.