-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathsubsetPhy.py
executable file
·129 lines (115 loc) · 3.1 KB
/
subsetPhy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/usr/bin/python
import getopt
import sys
import os
#Object to parse command-line arguments
class parseArgs():
def __init__(self):
#Define options
try:
options, remainder = getopt.getopt(sys.argv[1:], 'x:p:l:o:h', \
["xml=","phy=","list=","out=","help"])
except getopt.GetoptError as err:
print(err)
self.display_help("\nExiting because getopt returned non-zero exit status.")
#Default values for params
#Input params
self.xml=None
self.phy=None
self.tax=None
self.out="out.phy"
#First pass to see if help menu was called
for o, a in options:
if o in ("-h", "-help", "--help"):
self.display_help("Exiting because help menu was called.")
#Second pass to set all args.
for opt, arg_raw in options:
arg = arg_raw.replace(" ","")
arg = arg.strip()
opt = opt.replace("-","")
#print(opt,arg)
if opt in ('x', 'xml'):
self.xml = arg
elif opt in ('h', 'help'):
pass
elif opt in ('p','phy'):
self.phy = arg
elif opt in ('l','list'):
self.tax = arg
elif opt in ('o','out'):
self.out = arg
else:
assert False, "Unhandled option %r"%opt
#Check manditory options are set
self.phy or self.display_help("INPUT ERROR: No PHYLIP provided")
self.tax or self.display_help("INPUT ERROR: No TAXON LIST provided")
def display_help(self, message=None):
if message is not None:
print()
print (message)
print ("\nsubsetPhy.py\n")
print ("Contact:\n\n\tTyler K. Chafin\n\tUniversity of Arkansas\n\ttkchafin@uark.edu\n")
print ("\nUsage:\n\t", sys.argv[0], "-p </path/to/xml> -l </path/to/.txt\n")
print ("Description:\n")
print("\tsubsetPhy.py is a quickly written and shitty script to help manipulate phylip files\n")
print("""
Input options:
-p,--phy : Phylip file
-l,--list : .txt file containing a list of taxa to subset
-o,--out : (Optional) output prefix [default:out.xml]
-h,--help : Displays help menu""")
print()
sys.exit()
################################# MAIN #########################################
params = parseArgs()
#Read TAX LIST into a list
taxlist = list()
fullnames = list()
fh = open(params.tax)
try:
with fh as file_object:
for line in file_object:
line = line.strip()
if not line:
continue
line = line.replace(" ","")
arr = line.split("_")
taxlist.append(arr[-1])
fullnames.append(line)
finally:
fh.close()
#Read phylip file
data = {}
numSites = None
count = 0
pfh = open(params.phy)
try:
with pfh as file_object:
for line in file_object:
line = line.strip()
if not line:
continue
count += 1
if count == 1:
continue
arr = line.split()
if arr[0] in taxlist:
data[fullnames[taxlist.index(arr[0])]] = arr[1]
if numSites:
if len(arr[1]) != numSites:
sys.exit("ERROR: Samples do not have the same sequence length -")
else:
numSites = len(arr[1])
finally:
pfh.close()
#Open output file
ofh = open(params.out, "w")
try:
with ofh as file_object:
header = str(len(data)) + " " + str(numSites) + "\n"
file_object.write(header)
for key in data:
out = key + "\t" + data[key] + "\n"
file_object.write(out)
finally:
ofh.close()