|
| 1 | +#!/usr/bin/env python |
| 2 | +# -*- coding: utf-8 -*- |
| 3 | +# @Time : 9/13/2021 1:03 AM |
| 4 | +# @Author : Runsheng |
| 5 | +# @File : megalodon_pair.py |
| 6 | +""" |
| 7 | +Assume the dir has the following str (paired samples): |
| 8 | +D0: |
| 9 | + modified_bases.5mC.bed (prefix.bed) |
| 10 | +D10: |
| 11 | + modified_bases.5mC.bed |
| 12 | +
|
| 13 | +need to return the intermediate files for further plotting |
| 14 | +and the plotting for the current sample: dotplot, and lineplot for three |
| 15 | +""" |
| 16 | +import pandas |
| 17 | +import os |
| 18 | +import subprocess |
| 19 | +import sys |
| 20 | +import signal |
| 21 | +import fnmatch |
| 22 | + |
| 23 | +###### utils |
| 24 | +def myexe(cmd, timeout=0): |
| 25 | + """ |
| 26 | + a simple wrap of the shell |
| 27 | + mainly used to run the bwa mem mapping and samtool orders |
| 28 | + """ |
| 29 | + def setupAlarm(): |
| 30 | + signal.signal(signal.SIGALRM, alarmHandler) |
| 31 | + signal.alarm(timeout) |
| 32 | + |
| 33 | + def alarmHandler(signum, frame): |
| 34 | + sys.exit(1) |
| 35 | + |
| 36 | + proc=subprocess.Popen(cmd, shell=True, preexec_fn=setupAlarm, |
| 37 | + stdout=subprocess.PIPE, stderr=subprocess.PIPE,cwd=os.getcwd()) |
| 38 | + out, err=proc.communicate() |
| 39 | + print(err) |
| 40 | + return out, err, proc.returncode |
| 41 | + |
| 42 | + |
| 43 | +def myglob(seqdir, word): |
| 44 | + """ |
| 45 | + to write a glob for python2 for res-glob |
| 46 | + """ |
| 47 | + matches=[] |
| 48 | + for root, dirnames, filenames in os.walk(seqdir): |
| 49 | + for filename in fnmatch.filter(filenames, word): |
| 50 | + matches.append(os.path.join(root, filename)) |
| 51 | + return matches |
| 52 | +#### utils end |
| 53 | + |
| 54 | +def sum_5mc_ratio(d0): |
| 55 | + """ |
| 56 | + d0 is a df from the megalodon bed dfile |
| 57 | + print is (methylated C number, unmethylated C number) |
| 58 | + return methylated C number |
| 59 | + """ |
| 60 | + return (d0[10] / 100 * d0[9]).sum() / d0[9].sum() |
| 61 | + |
| 62 | + |
| 63 | +def sum_inter_promoter(in_filename, out_filename): |
| 64 | + """ |
| 65 | + return the gene:mean propotion in promoter info |
| 66 | + """ |
| 67 | + wcma_d0_inter=pandas.read_csv(in_filename, sep="\t", header=None) |
| 68 | + df=wcma_d0_inter.groupby([4])[15].mean() |
| 69 | + df.to_csv(out_filename, header=False) |
| 70 | + |
| 71 | + |
| 72 | +def main(): |
| 73 | + |
| 74 | + os.chdir("/data/aml") |
| 75 | + # fpr TTK |
| 76 | + d0 = pandas.read_csv("./TTK/D0/modified_bases.5mC.bed", sep="\t", header=None) |
| 77 | + d10 = pandas.read_csv("./TTK/D10/modified_bases.5mC.bed", sep="\t", header=None) |
| 78 | + for i in [d0, d10]: |
| 79 | + print(sum_5mc_ratio(i)) |
| 80 | + |
| 81 | + bedtools_cmd=""" |
| 82 | + bedtools intersect -a /data/aml/ref/promoter.bed -b /data/aml/TTK/D0/modified_bases.5mC.bed -wa -wb > /data/aml/TTK/D0/TTK_D0_5mc_inter.bed & |
| 83 | + bedtools intersect -a /data/aml/ref/promoter.bed -b /data/aml/TTK/D10/modified_bases.5mC.bed -wa -wb > /data/aml/TTK/D10/TTK_D10_5mc_inter.bed |
| 84 | + """ |
| 85 | + myexe(bedtools_cmd) |
| 86 | + |
| 87 | + sum_inter_promoter(in_filename="/data/aml/TTK/D0/TTK_D0_5mc_inter.bed", out_filename="/data/aml/TTK/D0/TTK_D0_5mc_promoter.csv") |
| 88 | + sum_inter_promoter(in_filename="/data/aml/TTK/D10/TTK_D10_5mc_inter.bed", out_filename="/data/aml/TTK/D10/TTK_D10_5mc_promoter.csv") |
| 89 | + |
| 90 | +if __name__ == "__main__": |
| 91 | + main() |
0 commit comments