Skip to content
This repository has been archived by the owner on Mar 24, 2021. It is now read-only.

Commit

Permalink
Merge branch 'development' into AGR-367-ribbon-rev2
Browse files Browse the repository at this point in the history
  • Loading branch information
adamjohnwright authored May 29, 2017
2 parents e41e74a + 3051f89 commit 0eaee80
Showing 1 changed file with 14 additions and 13 deletions.
27 changes: 14 additions & 13 deletions indexer/src/mods/human.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,21 +31,22 @@ def load_genes(self, batch_size, test_set):

def load_go(self):
path = "tmp"
S3File("mod-datadumps", "Human_GO_2_23_2017.tsv", path).download()
go_data = CSVFile(path + "/Human_GO_2_23_2017.tsv").get_data()
S3File("mod-datadumps/GO/ANNOT", "gene_association.human.gz", path).download()
go_annot_dict = {}
for row in go_data:
go_terms = map(lambda s: s.strip(), row[1].split(","))
for term in go_terms:
with gzip.open(path + "/gene_association.human.gz", 'rb') as file:
reader = csv.reader(file, delimiter='\t')
for row in reader:
gene = row[0]
if gene in go_annot_dict:
go_annot_dict[gene]['go_id'].append(term)
else:
go_annot_dict[gene] = {
'gene_id': gene,
'go_id': [term],
'species': Human.species
}
go_terms = map(lambda s: s.strip(), row[1].split(","))
for term in go_terms:
if gene in go_annot_dict:
go_annot_dict[gene]['go_id'].append(term)
else:
go_annot_dict[gene] = {
'gene_id': gene,
'go_id': [term],
'species': Human.species
}
return go_annot_dict

def load_diseases(self):
Expand Down

0 comments on commit 0eaee80

Please sign in to comment.