Skip to content

Commit e61bdae

Browse files
committed
add pararun
pararun is used to run a cmd for all the files with same surfix using similar cmds
1 parent 62ac02a commit e61bdae

File tree

2 files changed

+259
-0
lines changed

2 files changed

+259
-0
lines changed

gbrowser_script/maker_re.py

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
#! /usr/bin/env python
2+
import argparse
3+
4+
def gff_to_list(gfffile):
5+
gene_l=[]
6+
7+
with open(gfffile, "r") as f:
8+
lines=f.readlines()
9+
for line in lines:
10+
if line[0]!="#" and len(line)>18:
11+
gene_l.append(line)
12+
13+
return gene_l
14+
15+
16+
def name_to_list(filename):
17+
"""
18+
the name is like Bp_scaf_6090-3.8
19+
Bp_scaf_6090 is contig name
20+
3.8 is the mRNA name
21+
"""
22+
name_l=[]
23+
with open(filename) as f:
24+
lines=f.readlines()
25+
for line in lines:
26+
name_l.append(line.strip())
27+
return name_l
28+
29+
30+
def gene_format(gene_l):
31+
"""
32+
format the gff line to blocks,
33+
{gene name, [mRNAline, cdsline...]}
34+
35+
the gff need to be sorted and all gene/mRNA/exon/cds/UTR is together
36+
"""
37+
38+
gene_d={}
39+
for line in gene_l:
40+
line_l=line.split("\t")
41+
42+
gff_type=line_l[2]
43+
attributes=line_l[-1].strip()
44+
45+
if gff_type=="gene":
46+
for attribute in attributes.split(";"):
47+
if "ID" in attribute:
48+
kk=attribute.split("=")[1]
49+
gene_d[kk]=[]
50+
gene_d[kk].append(line)
51+
52+
else:
53+
for attribute in attributes.split(";"):
54+
if "Parent" in attribute:
55+
parent=attribute.split("=")[1]
56+
if kk in parent:
57+
gene_d[kk].append(line)
58+
return gene_d
59+
60+
61+
def gene_select(gene_d,name_l):
62+
"""
63+
select by key of gene_d
64+
"""
65+
gff_new={}
66+
name_set=set(name_l)
67+
for key in gene_d.keys():
68+
name=key.split("-")[1]+"-"+key.split("-")[-1]
69+
if name in name_set:
70+
gff_new[key]=gene_d[key]
71+
return gff_new
72+
73+
74+
def gene_name_replace(gff_d):
75+
"""
76+
replace the long name to short one,
77+
from "maker-Bp_scaf_46564-exonerate_est2genome-gene-0.0" to "Bp_scaf_46564-0.0"
78+
"""
79+
gff_namere={}
80+
81+
name_d={}
82+
for key in gff_d.keys():
83+
name=key.split(":")[1]
84+
name_re=key.split("-")[1]+"-"+key.split("-")[-1]
85+
name_d[key]=(name,name_re)
86+
87+
for k, v in gff_d.items():
88+
v_re=[]
89+
for line in v:
90+
line_new=line.replace(name_d[k][0], name_d[k][1])
91+
v_re.append(line_new)
92+
gff_namere[k]=v_re
93+
94+
return gff_namere
95+
96+
97+
def gff_write(gff_d, out):
98+
99+
def write_line():
100+
fw.write(line)
101+
102+
fw=open(out,"w")
103+
for k in gff_d.keys():
104+
#print k
105+
v=gff_d[k]
106+
for line in v:
107+
write_line()
108+
fw.close()
109+
110+
111+
112+
if __name__=="__main__":
113+
parser = argparse.ArgumentParser()
114+
parser.add_argument("--gff_in",help="the sorted maker gff file")
115+
parser.add_argument("--namefile", help="the genes you want to keep in new gff file")
116+
parser.add_argument("--gff_out", help="the name of output gff file")
117+
args = parser.parse_args()
118+
119+
try:
120+
gene_l=gff_to_list(args.gff_in)
121+
name_l=name_to_list(args.namefile)
122+
except Exception:
123+
raise IOERROR("Error occurred, please make sure the gff and name file is in your path")
124+
125+
gene_d=gene_format(gene_l)
126+
gff_new=gene_select(gene_d,name_l)
127+
gff_re=gene_name_replace(gff_new)
128+
129+
print "The old gff file contains %d genes and the new gff file contains % d genes." % (len(gene_d), len(gff_new))
130+
131+
try:
132+
gff_write(gff_re, args.gff_out)
133+
134+
except Exception:
135+
print "Error occurred, please check you have write permission to your disk"

pararun.py

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
# @Time : 8/3/2020 4:37 PM
4+
# @Author : Runsheng
5+
# @File : pararun.py
6+
7+
"""
8+
A general para runner for the functions with only one input file/para
9+
"""
10+
11+
from __future__ import print_function
12+
import os
13+
import argparse
14+
import subprocess
15+
from glob import glob
16+
import logging
17+
import sys
18+
import signal
19+
import multiprocessing
20+
21+
22+
def myexe(cmd, timeout=0):
23+
"""
24+
a simple wrap of the shell
25+
mainly used to run the bwa mem mapping and samtool orders
26+
"""
27+
def setupAlarm():
28+
signal.signal(signal.SIGALRM, alarmHandler)
29+
signal.alarm(timeout)
30+
31+
def alarmHandler(signum, frame):
32+
sys.exit(1)
33+
34+
proc=subprocess.Popen(cmd, shell=True, preexec_fn=setupAlarm,
35+
stdout=subprocess.PIPE, stderr=subprocess.PIPE,cwd=os.getcwd())
36+
out, err=proc.communicate()
37+
print(err)
38+
return out, err, proc.returncode
39+
40+
41+
def parmap(f, X, nprocs=multiprocessing.cpu_count()):
42+
"""
43+
a function to use mutip map inside a function
44+
modified from stackoverflow, 3288595
45+
:param f:
46+
:param X:
47+
:param nprocs: core, if not given, use all core
48+
:return:
49+
"""
50+
q_in = multiprocessing.Queue(1)
51+
q_out = multiprocessing.Queue()
52+
53+
proc = [multiprocessing.Process(target=fun, args=(f, q_in, q_out))
54+
for _ in range(nprocs)]
55+
for p in proc:
56+
p.daemon = True
57+
p.start()
58+
59+
sent = [q_in.put((i, x)) for i, x in enumerate(X)]
60+
[q_in.put((None, None)) for _ in range(nprocs)]
61+
res = [q_out.get() for _ in range(len(sent))]
62+
63+
[p.join() for p in proc]
64+
65+
return [x for i, x in sorted(res)]
66+
67+
68+
def fun(f, q_in, q_out):
69+
"""
70+
for parmap
71+
:param f:
72+
:param q_in:
73+
:param q_out:
74+
:return:
75+
"""
76+
while True:
77+
i, x = q_in.get()
78+
if i is None:
79+
break
80+
q_out.put((i, f(x)))
81+
82+
83+
def path_split(pathstring):
84+
laststring = pathstring.split("/")[-1]
85+
if "." in laststring:
86+
return laststring.split(".")[0]
87+
else:
88+
return laststring.split(".")[0]
89+
90+
91+
def get_prefix(bamlist):
92+
prefix_l = []
93+
94+
for i in bamlist:
95+
prefix_l.append(path_split(i))
96+
return prefix_l
97+
98+
99+
if __name__=="__main__":
100+
parser = argparse.ArgumentParser()
101+
parser.add_argument("-f", "--folder",help="the folder containing all files")
102+
parser.add_argument("-s", "--suffix", help="the suffix of the files")
103+
parser.add_argument("-p", "--core",default=10, help="the cores used")
104+
parser.add_argument("-c", "--cmd", default="ls", help="the cmd to run, with prefix as prefix")
105+
106+
args = parser.parse_args()
107+
108+
## main run code
109+
seqdir = args.folder
110+
os.chdir(seqdir)
111+
logging.info("Move to {dir}".format(dir=seqdir))
112+
file_s = glob("*."+args.suffix)
113+
prefix_l= (get_prefix(file_s))
114+
115+
def run_one(prefix):
116+
file_one=prefix+args.suffix
117+
cmd_run=args.cmd.replace("prefix", prefix)
118+
cmd_new = """cd {seqdir}
119+
{cmd_run}""".format(cmd_run=cmd_run, seqdir=seqdir)
120+
print(cmd_new)
121+
return myexe(cmd_new)
122+
123+
parmap(run_one, prefix_l, nprocs=int(args.core))
124+

0 commit comments

Comments
 (0)