-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpipeline_package_fixtures.py
139 lines (92 loc) · 3.59 KB
/
pipeline_package_fixtures.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
from pathlib import Path
import pytest
from datasail.cluster.clustering import cluster
from datasail.reader.utils import read_csv, parse_fasta
from datasail.reader.read_molecules import read_molecule_data
from datasail.reader.read_proteins import read_protein_data
base = Path("data") / "rw_data"
def sabdab_inter_list():
return list(read_csv(base / "sabdab_full" / "inter.tsv", "\t"))
def sabdab_inter_returner():
return lambda: list(read_csv(base / "sabdab_full" / "inter.tsv", "\t"))
def sabdab_inter_generator():
for x in list(read_csv(base / "sabdab_full" / "inter.tsv", "\t")):
yield x
@pytest.fixture
def sabdab_ag_dataset():
return cluster(
read_protein_data(base / "sabdab_full" / "ag.fasta", None, None, None, None,
list(read_csv(base / "sabdab_full" / "inter.tsv", "\t")), 0, 50, ""),
num_clusters=50,
threads=1,
logdir=Path(),
linkage="average",
)
@pytest.fixture
def sabdab_vh_dataset():
return cluster(
read_protein_data(base / "sabdab_full" / "vh.fasta", None, None, None, None,
list(read_csv(base / "sabdab_full" / "inter.tsv", "\t")), 1, 50, ""),
num_clusters=50,
threads=1,
logdir=Path(),
linkage="average",
)
@pytest.fixture
def mave_dataset():
return cluster(
read_protein_data(base / "mave" / "mave_db_gold_standard_only_sequences.fasta",
base / "mave" / "mave_db_gold_standard_weights.tsv",
None, None, None, None, None, 50, ""
),
num_clusters=50,
threads=1,
logdir=Path(),
linkage="average",
)
@pytest.fixture
def mibig_dataset():
return cluster(
read_molecule_data(
base / "mibig" / "compounds.tsv", None, None, None, None, None, None, 50, ""
),
num_clusters=50,
threads=1,
logdir=Path(),
linkage="average",
)
def mibig_dict():
return dict(read_csv(base / "mibig" / "compounds.tsv", "\t"))
def mibig_returner():
return lambda: dict(read_csv(base / "mibig" / "compounds.tsv", "\t"))
def mibig_generator():
for x in list(read_csv(base / "mibig" / "compounds.tsv", "\t")):
yield x
def mave_weights_dict():
return dict((n, float(w)) for n, w in read_csv(base / "mave" / "mave_db_gold_standard_weights.tsv", "\t"))
def mave_weights_returner():
return lambda: dict((n, float(w)) for n, w in read_csv(base / "mave" / "mave_db_gold_standard_weights.tsv", "\t"))
def mave_weights_generator():
for x, y in list(read_csv(base / "mave" / "mave_db_gold_standard_weights.tsv", "\t")):
yield x, float(y)
def mave_dict():
return parse_fasta(base / "mave" / "mave_db_gold_standard_only_sequences.fasta")
def mave_returner():
return lambda: parse_fasta(base / "mave" / "mave_db_gold_standard_only_sequences.fasta")
def mave_generator():
for x in list(parse_fasta(base / "mave" / "mave_db_gold_standard_only_sequences.fasta").items()):
yield x
def sabdab_ag_dict():
return parse_fasta(base / "sabdab_full" / "ag.fasta")
def sabdab_ag_returner():
return lambda: parse_fasta(base / "sabdab_full" / "ag.fasta")
def sabdab_ag_generator():
for x in list(parse_fasta(base / "sabdab_full" / "ag.fasta").items()):
yield x
def sabdab_vh_dict():
return parse_fasta(base / "sabdab_full" / "vh.fasta")
def sabdab_vh_returner():
return lambda: parse_fasta(base / "sabdab_full" / "vh.fasta")
def sabdab_vh_generator():
for x in list(parse_fasta(base / "sabdab_full" / "vh.fasta").items()):
yield x