|
| 1 | +#!/usr/bin/env python |
| 2 | +# -*- coding: utf-8 -*- |
| 3 | +# @Time : 10/20/2020 4:02 PM |
| 4 | +# @Author : Runsheng |
| 5 | +# @File : runiter.py |
| 6 | + |
| 7 | +""" |
| 8 | +A general runner for the functions require multiple rounds of iteration, like genome polishing |
| 9 | +""" |
| 10 | + |
| 11 | +from __future__ import print_function |
| 12 | +import os |
| 13 | +import argparse |
| 14 | +import subprocess |
| 15 | +import logging |
| 16 | +import sys |
| 17 | +import signal |
| 18 | +from glob import glob |
| 19 | +import re |
| 20 | + |
| 21 | + |
| 22 | +def myexe(cmd, timeout=0): |
| 23 | + """ |
| 24 | + a simple wrap of the shell |
| 25 | + mainly used to run the bwa mem mapping and samtool orders |
| 26 | + """ |
| 27 | + def setupAlarm(): |
| 28 | + signal.signal(signal.SIGALRM, alarmHandler) |
| 29 | + signal.alarm(timeout) |
| 30 | + |
| 31 | + def alarmHandler(signum, frame): |
| 32 | + sys.exit(1) |
| 33 | + |
| 34 | + proc=subprocess.Popen(cmd, shell=True, preexec_fn=setupAlarm, |
| 35 | + stdout=subprocess.PIPE, stderr=subprocess.PIPE,cwd=os.getcwd()) |
| 36 | + out, err=proc.communicate() |
| 37 | + print(err) |
| 38 | + return out, err, proc.returncode |
| 39 | + |
| 40 | +def mylogger(): |
| 41 | + ############### |
| 42 | + # create logger |
| 43 | + logger = logging.getLogger('') |
| 44 | + logger.setLevel(logging.INFO) |
| 45 | + # create console handler and set level to debug |
| 46 | + ch = logging.StreamHandler() |
| 47 | + ch.setLevel(logging.INFO) |
| 48 | + # create formatter |
| 49 | + formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') |
| 50 | + # add formatter to ch |
| 51 | + ch.setFormatter(formatter) |
| 52 | + # add ch to logger |
| 53 | + logger.addHandler(ch) |
| 54 | + # 'application' code |
| 55 | + ############### |
| 56 | + return logger |
| 57 | + |
| 58 | + |
| 59 | +def get_file_round(wkdir=None, key1="round1"): |
| 60 | + if wkdir is None: |
| 61 | + wkdir=os.getcwd() |
| 62 | + os.chdir(wkdir) |
| 63 | + key=key1.replace("1", "") |
| 64 | + |
| 65 | + file_name_l=glob("*"+key+"*") |
| 66 | + print(file_name_l) |
| 67 | + if file_name_l>1: |
| 68 | + num_l=[] |
| 69 | + for filename in file_name_l: |
| 70 | + x=re.findall(key+"[0-9]", filename) |
| 71 | + for i in x: |
| 72 | + num_x=int(i.replace(key, "")) |
| 73 | + num_l.append(num_x) |
| 74 | + return max(num_l) |
| 75 | + else: |
| 76 | + return 0 |
| 77 | + |
| 78 | +def generate_one(cmd, round, key0="round0", key1="round1"): |
| 79 | + """ |
| 80 | + place the cmd, and replace "_round1" with the current round "_roundn" |
| 81 | + replace "_round0" to "_round(n-1)" |
| 82 | + :param key0, the key has a 0 inside the string, indicate the iter1 input |
| 83 | + :param key1, the key has a 1 inside the string, to indicate the iter1 output/ inter2 input |
| 84 | + """ |
| 85 | + if key0 in cmd and key1 in cmd: |
| 86 | + key=key1.replace("1", "") |
| 87 | + key_new0=key+str(round-1) |
| 88 | + key_new1=key+str(round) |
| 89 | + |
| 90 | + cmd1 = cmd.replace(key1, key_new1) |
| 91 | + cmd2 = cmd1.replace(key0, key_new0) |
| 92 | + return cmd2 |
| 93 | + |
| 94 | + else: |
| 95 | + raise KeyError("Keys used to indicate round do not fit the round0/round1 format") |
| 96 | + return None |
| 97 | + |
| 98 | +if __name__=="__main__": |
| 99 | + |
| 100 | + example_text = '''example: |
| 101 | + ### example to run the runiter with 4 times |
| 102 | + runiter.py -r 4 -0 round0 -1 round1 -c "minimap2 -ax asm20 -t 48 ref_round0.fa ../polish/slr/merge.fq > aln_round1.sam && racon -u -t 48 ../polish/slr/merge.fq aln_round1.sam ref_round0.fa > ref_round1.fa" |
| 103 | + |
| 104 | + ''' |
| 105 | + parser = argparse.ArgumentParser(prog='runiter', |
| 106 | + description='runiter', |
| 107 | + epilog=example_text, |
| 108 | + formatter_class=argparse.RawDescriptionHelpFormatter) |
| 109 | + |
| 110 | + parser.add_argument("-r", "--round", type=int, default=5, help="how many round will this cmd runs") |
| 111 | + parser.add_argument("-c", "--cmd", default="echo test round0 round1", help="the cmd line to be run, with round0 and round1 indicating the iter items") |
| 112 | + parser.add_argument("-0", "--key0", default="round0", help="the indicator of the key for iter0, need to have a 0 inside") |
| 113 | + parser.add_argument("-1", "--key1", default="round1", help="the indicator of the key for iter1, need to have a 1 inside") |
| 114 | + parser.add_argument("--resume", default="no", help="try to resume the former run by find the max round") |
| 115 | + |
| 116 | + args = parser.parse_args() |
| 117 | + |
| 118 | + logger=mylogger() |
| 119 | + |
| 120 | + start_round=1 |
| 121 | + |
| 122 | + if args.resume=="yes": |
| 123 | + n=get_file_round() |
| 124 | + start_round=n+1 |
| 125 | + logger.info("found round{} output, start from round{}".format(n, start_round)) |
| 126 | + |
| 127 | + for i in range(start_round, args.round+1): |
| 128 | + cmd_one=generate_one(args.cmd, i, key0=args.key0, key1=args.key1) |
| 129 | + logger.info("running round {}".format(i)) |
| 130 | + logger.info("running {}".format(cmd_one)) |
| 131 | + myexe(cmd_one) |
| 132 | + logger.info("##########finished round {} ##########".format(i)) |
0 commit comments