|
| 1 | +import os |
| 2 | +import pytest |
| 3 | + |
| 4 | +from tests.util import s3 |
| 5 | +import toolchest_client as toolchest |
| 6 | + |
| 7 | +toolchest_api_key = os.environ.get("TOOLCHEST_API_KEY") |
| 8 | +if toolchest_api_key: |
| 9 | + toolchest.set_key(toolchest_api_key) |
| 10 | + |
| 11 | + |
| 12 | +@pytest.mark.integration |
| 13 | +def test_star_grch38(): |
| 14 | + """ |
| 15 | + Tests STAR against the grch38 database |
| 16 | + """ |
| 17 | + test_dir = "test_star_grch38" |
| 18 | + os.makedirs(f"./{test_dir}", exist_ok=True) |
| 19 | + input_file_path = "./small_star.fastq" |
| 20 | + output_dir_path = f"./{test_dir}/" |
| 21 | + output_file_path = f"{output_dir_path}Aligned.out.sam" |
| 22 | + |
| 23 | + s3.download_integration_test_input( |
| 24 | + s3_file_key="small_star_500k.fastq", |
| 25 | + output_file_path=input_file_path, |
| 26 | + ) |
| 27 | + |
| 28 | + toolchest.STAR( |
| 29 | + read_one=input_file_path, |
| 30 | + output_path=output_dir_path, |
| 31 | + database_name="GRCh38", |
| 32 | + ) |
| 33 | + |
| 34 | + # Because STAR is non-deterministic, verify that the number of bytes is in range |
| 35 | + assert 185952744 <= os.path.getsize(output_file_path) <= 185952766 |
| 36 | + |
| 37 | + |
| 38 | +@pytest.mark.integration |
| 39 | +def test_star_grch38_parallel(): |
| 40 | + """ |
| 41 | + Tests STAR against the grch38 database, using parallel mode |
| 42 | + """ |
| 43 | + test_dir = "test_star_grch38_parallel" |
| 44 | + os.makedirs(f"./{test_dir}", exist_ok=True) |
| 45 | + input_file_path = "./large_star.fastq" |
| 46 | + output_dir_path = f"./{test_dir}/" |
| 47 | + output_file_path = f"{output_dir_path}Aligned.out.sam" |
| 48 | + |
| 49 | + s3.download_integration_test_input( |
| 50 | + s3_file_key="large_star_15GB.fastq", |
| 51 | + output_file_path=input_file_path, |
| 52 | + ) |
| 53 | + |
| 54 | + toolchest.STAR( |
| 55 | + read_one=input_file_path, |
| 56 | + output_path=output_file_path, |
| 57 | + database_name="GRCh38", |
| 58 | + parallelize=True, |
| 59 | + ) |
| 60 | + |
| 61 | + # Because STAR is non-deterministic, verify that the number of bytes is in range |
| 62 | + assert 33292990718 <= os.path.getsize(output_file_path) <= 33292994718 |
| 63 | + |
| 64 | + |
| 65 | +@pytest.mark.integration |
| 66 | +def test_star_grch38_dangerous_arg(): |
| 67 | + """ |
| 68 | + Tests STAR against the grch38 database, with a dangerous arg (changing functionality) |
| 69 | + """ |
| 70 | + test_dir = "test_star_grch38" |
| 71 | + os.makedirs(f"./{test_dir}", exist_ok=True) |
| 72 | + input_file_path = "./small_star.fastq" |
| 73 | + output_dir_path = f"./{test_dir}/" |
| 74 | + output_file_path = f"{output_dir_path}Aligned.out.bam" |
| 75 | + |
| 76 | + s3.download_integration_test_input( |
| 77 | + s3_file_key="small_star_500k.fastq", |
| 78 | + output_file_path=input_file_path, |
| 79 | + ) |
| 80 | + |
| 81 | + toolchest.STAR( |
| 82 | + read_one=input_file_path, |
| 83 | + output_path=output_dir_path, |
| 84 | + database_name="GRCh38", |
| 85 | + tool_args="--outSAMtype BAM Unsorted", |
| 86 | + parallelize=True, # this should be deliberately ignored |
| 87 | + ) |
| 88 | + |
| 89 | + # Because STAR is non-deterministic and BAMs are are compressed verify that the number of bytes is in range |
| 90 | + assert 38236020 <= os.path.getsize(output_file_path) <= 38236030 |
| 91 | + |
| 92 | + # Make sure all non-parallel files exist as well |
| 93 | + assert os.path.isfile(f"{output_dir_path}Log.final.out") |
| 94 | + assert os.path.isfile(f"{output_dir_path}Log.out") |
| 95 | + assert os.path.isfile(f"{output_dir_path}Log.progress.out") |
| 96 | + assert os.path.isfile(f"{output_dir_path}SJ.out.tab") |
0 commit comments