forked from galaxyproject/galaxy
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparse_builds.py
52 lines (43 loc) · 1.4 KB
/
parse_builds.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#!/usr/bin/env python
"""
Connects to the URL specified and outputs builds available at that
DSN in tabular format. UCSC Main gateway is used as default.
build description
"""
import sys
import xml.etree.ElementTree as ElementTree
import requests
def getbuilds(url):
try:
text = requests.get(url).text
except Exception:
print("#Unable to open " + url)
print("?\tunspecified (?)")
sys.exit(1)
try:
tree = ElementTree.fromstring(text)
except Exception:
print("#Invalid xml passed back from " + url)
print("?\tunspecified (?)")
sys.exit(1)
print("#Harvested from " + url)
print("?\tunspecified (?)")
for dsn in tree:
build = dsn.find("SOURCE").attrib["id"]
description = dsn.find("DESCRIPTION").text.replace(" - Genome at UCSC", "").replace(" Genome at UCSC", "")
fields = description.split(" ")
temp = fields[0]
for i in range(len(fields) - 1):
if temp == fields[i + 1]:
fields.pop(i + 1)
else:
temp = fields[i + 1]
description = " ".join(fields)
yield [build, description]
if __name__ == "__main__":
if len(sys.argv) > 1:
URL = sys.argv[1]
else:
URL = "http://genome.cse.ucsc.edu/cgi-bin/das/dsn"
for build in getbuilds(URL):
print(build[0] + "\t" + build[1] + " (" + build[0] + ")")