Skip to content

Commit 1af2a11

Browse files
committed
add new
1 parent f82213a commit 1af2a11

File tree

7 files changed

+217
-1
lines changed

7 files changed

+217
-1
lines changed

fasta_dedup.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
# @Time : 9/15/2021 12:09 AM
4+
# @Author : Runsheng
5+
# @File : fasta_dedup.py
6+
"""
7+
Used to merge the identical fasta terms, and merge some keyword in name to the representative name
8+
The name is in NCBI format, supposed to include position, year as strain name
9+
The resulting file will be used in tree making
10+
"""
11+
from collections import OrderedDict
12+
13+
14+
def read_fasta_to_dic(filename):
15+
fa_dic=OrderedDict()
16+
with open(filename, "r") as f:
17+
for line in enumerate(f.readlines()):
18+
is_name=0 # 0
19+
if line.startswith(">"):
20+
full_name=""
21+
22+

get_near_ref.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
from collections import OrderedDict
2222

2323

24-
2524
def myexe(cmd, timeout=0):
2625
"""
2726
a simple wrap of the shell

methy/megalodon_pair.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
# @Time : 9/13/2021 1:03 AM
4+
# @Author : Runsheng
5+
# @File : megalodon_pair.py
6+
"""
7+
Assume the dir has the following str (paired samples):
8+
D0:
9+
modified_bases.5mC.bed (prefix.bed)
10+
D10:
11+
modified_bases.5mC.bed
12+
13+
need to return the intermediate files for further plotting
14+
and the plotting for the current sample: dotplot, and lineplot for three
15+
"""
16+
import pandas
17+
import os
18+
import subprocess
19+
import sys
20+
import signal
21+
import fnmatch
22+
23+
###### utils
24+
def myexe(cmd, timeout=0):
25+
"""
26+
a simple wrap of the shell
27+
mainly used to run the bwa mem mapping and samtool orders
28+
"""
29+
def setupAlarm():
30+
signal.signal(signal.SIGALRM, alarmHandler)
31+
signal.alarm(timeout)
32+
33+
def alarmHandler(signum, frame):
34+
sys.exit(1)
35+
36+
proc=subprocess.Popen(cmd, shell=True, preexec_fn=setupAlarm,
37+
stdout=subprocess.PIPE, stderr=subprocess.PIPE,cwd=os.getcwd())
38+
out, err=proc.communicate()
39+
print(err)
40+
return out, err, proc.returncode
41+
42+
43+
def myglob(seqdir, word):
44+
"""
45+
to write a glob for python2 for res-glob
46+
"""
47+
matches=[]
48+
for root, dirnames, filenames in os.walk(seqdir):
49+
for filename in fnmatch.filter(filenames, word):
50+
matches.append(os.path.join(root, filename))
51+
return matches
52+
#### utils end
53+
54+
def sum_5mc_ratio(d0):
55+
"""
56+
d0 is a df from the megalodon bed dfile
57+
print is (methylated C number, unmethylated C number)
58+
return methylated C number
59+
"""
60+
return (d0[10] / 100 * d0[9]).sum() / d0[9].sum()
61+
62+
63+
def sum_inter_promoter(in_filename, out_filename):
64+
"""
65+
return the gene:mean propotion in promoter info
66+
"""
67+
wcma_d0_inter=pandas.read_csv(in_filename, sep="\t", header=None)
68+
df=wcma_d0_inter.groupby([4])[15].mean()
69+
df.to_csv(out_filename, header=False)
70+
71+
72+
def main():
73+
74+
os.chdir("/data/aml")
75+
# fpr TTK
76+
d0 = pandas.read_csv("./TTK/D0/modified_bases.5mC.bed", sep="\t", header=None)
77+
d10 = pandas.read_csv("./TTK/D10/modified_bases.5mC.bed", sep="\t", header=None)
78+
for i in [d0, d10]:
79+
print(sum_5mc_ratio(i))
80+
81+
bedtools_cmd="""
82+
bedtools intersect -a /data/aml/ref/promoter.bed -b /data/aml/TTK/D0/modified_bases.5mC.bed -wa -wb > /data/aml/TTK/D0/TTK_D0_5mc_inter.bed &
83+
bedtools intersect -a /data/aml/ref/promoter.bed -b /data/aml/TTK/D10/modified_bases.5mC.bed -wa -wb > /data/aml/TTK/D10/TTK_D10_5mc_inter.bed
84+
"""
85+
myexe(bedtools_cmd)
86+
87+
sum_inter_promoter(in_filename="/data/aml/TTK/D0/TTK_D0_5mc_inter.bed", out_filename="/data/aml/TTK/D0/TTK_D0_5mc_promoter.csv")
88+
sum_inter_promoter(in_filename="/data/aml/TTK/D10/TTK_D10_5mc_inter.bed", out_filename="/data/aml/TTK/D10/TTK_D10_5mc_promoter.csv")
89+
90+
if __name__ == "__main__":
91+
main()

methy/readme.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
### Aims:
2+
1. To processing megalodon result, extract intermediate files
3+
2. To plot the Figure for each sample
4+
3. Using the promoter annotation file, converted from ensembl or genbank gff

tutorials/Ecoli.png

705 KB
Loading

tutorials/figs/Ecoli_lenplot.svg

Lines changed: 100 additions & 0 deletions
Loading
File renamed without changes.

0 commit comments

Comments
 (0)