diff --git a/bin/_bin_argparse.py b/bin/_bin_argparse.py index b42f239..0bb36a7 100644 --- a/bin/_bin_argparse.py +++ b/bin/_bin_argparse.py @@ -64,7 +64,7 @@ def get_train_flipflop_parser(): '--warmup_batches', type=int, default=200, help='Over first n batches, increase learning rate like cosine.') trn_grp.add_argument( - '--lr_warmup', metavar='rate', type=Positive(float), + '--lr_warmup', metavar='rate', type=Positive(float), help='Start learning rate for warmup. Defaults to lr_min.') trn_grp.add_argument( '--min_momentum', type=Positive(float), diff --git a/misc/assess_alignment.py b/misc/assess_alignment.py index 136a778..d1c3ef8 100755 --- a/misc/assess_alignment.py +++ b/misc/assess_alignment.py @@ -89,7 +89,7 @@ def samacc(align_fn, min_coverage=0.6): :returns: list of ACC_METRICS namedtuples containing accuracy metrics for each valid alignment. """ - res = [] + res = {} with pysam.AlignmentFile(align_fn, 'r') as sf: for read in sf.fetch(until_eof=True): if read.flag != 0 and read.flag != 16: @@ -111,27 +111,29 @@ def samacc(align_fn, min_coverage=0.6): readlen = bins[0] + bins[1] perr = min(0.75, float(mismatch) / readlen) pmatch = 1.0 - perr + accuracy = float(correct) / alnlen entropy = pmatch * np.log2(pmatch) if mismatch > 0: entropy += perr * np.log2(perr / 3.0) - res.append(ACC_METRICS( - reference=read.reference_name, - query=read.query_name, - strand='-' if read.is_reverse else '+', - reference_start=read.reference_start, - reference_end=read.reference_end, - match=bins[0], - mismatch=mismatch, - insertion=bins[1], - deletion=bins[2], - coverage=coverage, - id=float(correct) / float(bins[0]), - accuracy=float(correct) / alnlen, - information=bins[0] * (2.0 + entropy))) - - return res + if read.query not in res or res[read.query].accuracy < accuracy: + res[read.query] = ACC_METRICS( + reference=read.reference_name, + query=read.query_name, + strand='-' if read.is_reverse else '+', + reference_start=read.reference_start, + reference_end=read.reference_end, + match=bins[0], + mismatch=mismatch, + insertion=bins[1], + deletion=bins[2], + coverage=coverage, + id=float(correct) / float(bins[0]), + accuracy=accuracy, + information=bins[0] * (2.0 + entropy)) + + return list(res.values()) def acc_plot(acc, mode, median, title, fill=PLOT_DO_FILL): diff --git a/misc/calibrate_qscores_byread.py b/misc/calibrate_qscores_byread.py index 518b92d..3d263ed 100755 --- a/misc/calibrate_qscores_byread.py +++ b/misc/calibrate_qscores_byread.py @@ -143,8 +143,8 @@ def get_alignment_data(alignment_file): # Try to read the file as a Guppy alignment summary file read_ids = t['read_id'] accuracies = t['alignment_accuracy'] - alignment_lens = (t['alignment_strand_end'] - - t['alignment_strand_start']) + alignment_lens = (t['alignment_strand_end'] - + t['alignment_strand_start']) print("Interpreted alignment file as Guppy output") accuracies[accuracies < 0] = np.nan return read_ids, accuracies, alignment_lens @@ -157,10 +157,10 @@ def get_alignment_data(alignment_file): read_ids = t['query'] accuracies = t['accuracy'] # Query length in alignment not available directly in taiyaki summary - alignment_lens = (t['reference_end'] - - t['reference_start'] - + t['insertion'] - - t['deletion']) + alignment_lens = (t['reference_end'] - + t['reference_start'] + + t['insertion'] - + t['deletion']) print("Interpreted alignment file as Taiyaki output") return read_ids, accuracies, alignment_lens except ValueError: diff --git a/misc/plot_mapped_signals.py b/misc/plot_mapped_signals.py index 1ac9a52..6fae259 100755 --- a/misc/plot_mapped_signals.py +++ b/misc/plot_mapped_signals.py @@ -31,7 +31,7 @@ def get_parser(): help='Max number of reads to read from each file. Not used if ' + 'read_ids are given') parser.add_argument( - '--read_ids', nargs='+', default=[], + '--read_ids', nargs='+', default=[], help='One or more read_ids. If not present, plots the first ' + '[--nreads] in each file') parser.add_argument( @@ -58,7 +58,7 @@ def get_parser(): help='Do not display status messages.') parser.add_argument( - 'mapped_signal_files', nargs='+', + 'mapped_signal_files', nargs='+', help='Inputs: one or more mapped signal files') return parser diff --git a/misc/plot_training.py b/misc/plot_training.py index 160aae2..1725270 100755 --- a/misc/plot_training.py +++ b/misc/plot_training.py @@ -45,7 +45,7 @@ def get_parser(): parser.add_argument( 'output', help='Output png file') parser.add_argument( - 'input_directories', nargs='+', + 'input_directories', nargs='+', help='One or more directories containing {} and {} files'.format( BATCH_LOG_FILENAME, VAL_LOG_FILENAME)) diff --git a/taiyaki/basecall_helpers.py b/taiyaki/basecall_helpers.py index ee15d29..44f0a09 100755 --- a/taiyaki/basecall_helpers.py +++ b/taiyaki/basecall_helpers.py @@ -76,8 +76,8 @@ def stitch_chunks(out, chunk_starts, chunk_ends, stride, path_stitching=False): # middle chunks for i in range(1, nchunks - 1): start = (chunk_ends[i - 1] - chunk_starts[i]) // (2 * stride) - end = (chunk_ends[i] + chunk_starts[i + 1] - - 2 * chunk_starts[i]) // (2 * stride) + end = (chunk_ends[i] + chunk_starts[i + 1] - + 2 * chunk_starts[i]) // (2 * stride) if path_stitching: start += 1 end += 1 diff --git a/taiyaki/common_cmdargs.py b/taiyaki/common_cmdargs.py index 9671b9e..1d6e368 100755 --- a/taiyaki/common_cmdargs.py +++ b/taiyaki/common_cmdargs.py @@ -38,7 +38,7 @@ def add_common_command_args(parser, arglist): help='Integer specifying which GPU to use, or "cpu" to use CPU ' + 'only. Other accepted formats: "cuda" (use default GPU), "cuda:2" ' 'or "cuda2" (use GPU 2).')), - ('eps', lambda: parser.add_argument( + ('eps', lambda: parser.add_argument( '--eps', default=1e-6, metavar='adjustment', type=Positive(float), help='Small value to stabilise optimiser')), ('filter_max_dwell', lambda: parser.add_argument( diff --git a/taiyaki/flipflop_remap.py b/taiyaki/flipflop_remap.py index 9b19976..29d99da 100644 --- a/taiyaki/flipflop_remap.py +++ b/taiyaki/flipflop_remap.py @@ -132,8 +132,8 @@ def flipflop_remap(transition_scores, sequence, alphabet=DEFAULT_ALPHABET, bases = np.array([alphabet.find(b) for b in sequence]) flops = flipflopfings.flopmask(bases) - stay_index = np.where(flops, bases + (2 * nbase + 1) - * nbase, bases + 2 * nbase * bases) + stay_index = np.where(flops, bases + (2 * nbase + 1) * + nbase, bases + 2 * nbase * bases) from_base = (bases + flops * nbase)[:-1] to_base = np.maximum(bases, nbase * flops)[1:] step_index = from_base + 2 * nbase * to_base diff --git a/test/unit/test_ctc_loss.py b/test/unit/test_ctc_loss.py index 24e7c6b..e543e23 100644 --- a/test/unit/test_ctc_loss.py +++ b/test/unit/test_ctc_loss.py @@ -48,10 +48,10 @@ def setUpClass(self): paths = {} weights = {} - paths['015'] = [0, 0, 1, 5, 5] - weights['015'] = [1.0, 1.0, 0.5, 1.0] + paths['015'] = [0, 0, 1, 5, 5] + weights['015'] = [1.0, 1.0, 0.5, 1.0] - paths['237'] = [2, 2, 3, 7, 7] + paths['237'] = [2, 2, 3, 7, 7] weights['237'] = [1.0, 0.5, 1.0, 1.0] weights['510'] = [0.0] # No weight for this sequence/path