-
Notifications
You must be signed in to change notification settings - Fork 50
/
test_arcas_hla.py
80 lines (67 loc) · 2.76 KB
/
test_arcas_hla.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# Test partial and whole HLA typing pipelines
import pytest
import subprocess
import json
from os.path import dirname, abspath
ROOT_DIR = dirname(dirname(abspath(__file__)))
@pytest.fixture(scope="session", autouse=True)
def set_reference_version():
"""
Fetch IMGT/HLA database version 3.24.0 before test suite
"""
reference_cmd = f"{ROOT_DIR}/arcasHLA reference --version 3.24.0"
subprocess.run(reference_cmd.split())
@pytest.fixture(scope="session", autouse=True)
def extract_reads():
"""
Extract reads before typing tests
"""
extract_cmd = f"{ROOT_DIR}/arcasHLA extract test/test.bam -o test/output -t 8 -v"
subprocess.run(extract_cmd.split())
def test_whole_allele_typing():
whole_typing_cmd = (
f"{ROOT_DIR}/arcasHLA genotype test/output/test.extracted.1.fq.gz "
f"test/output/test.extracted.2.fq.gz -g A,B,C,DPB1,DQB1,DQA1,DRB1 -o test/output -t 8 -v"
)
subprocess.run(whole_typing_cmd.split())
output_file = f"{ROOT_DIR}/test/output/test.genotype.json"
expected_output = {
"A": ["A*01:01:01", "A*03:01:01"],
"B": ["B*39:01:01", "B*07:02:01"],
"C": ["C*08:01:01", "C*01:02:01"],
"DPB1": ["DPB1*14:01:01", "DPB1*02:01:02"],
"DQA1": ["DQA1*02:01:01", "DQA1*05:03"],
"DQB1": ["DQB1*02:02:01", "DQB1*06:09:01"],
"DRB1": ["DRB1*10:01:01", "DRB1*14:02:01"]
}
with open(output_file, "r") as f:
output = json.load(f)
# Convert lists in output and expected output to sets since order does not matter for test
for key in output:
output[key] = set(output[key])
expected_output[key] = set(expected_output[key])
assert(output == expected_output)
def test_partial_allele_typing():
partial_typing_cmd = (
f"{ROOT_DIR}/arcasHLA partial test/output/test.extracted.1.fq.gz "
f"test/output/test.extracted.2.fq.gz -g A,B,C,DPB1,DQB1,DQA1,DRB1 -G test/output/test.genotype.json "
f"-o test/output -t 8 -v"
)
subprocess.run(partial_typing_cmd.split())
output_file = f"{ROOT_DIR}/test/output/test.partial_genotype.json"
expected_output = {
"A": ["A*01:01:01", "A*03:01:01"],
"B": ["B*07:02:01", "B*39:39:01"],
"C": ["C*08:01:01", "C*01:02:01"],
"DPB1": ["DPB1*14:01:01", "DPB1*02:01:02"],
"DQA1": ["DQA1*02:01:01", "DQA1*05:03"],
"DQB1": ["DQB1*06:04:01", "DQB1*02:02:01"],
"DRB1": ["DRB1*03:02:01", "DRB1*14:02:01"]
}
with open(output_file, "r") as f:
output = json.load(f)
# Convert lists in output and expected output to sets since order does not matter for test
for key in output:
output[key] = set(output[key])
expected_output[key] = set(expected_output[key])
assert(output == expected_output)