Skip to content

Commit 2ce77f4

Browse files
authored
v0.7.39 (#62)
Adds: - Ability to pass S3 URIs as inputs (#49) - Modified handling of arguments + raw execution mode + more STAR arguments (#57) - Multipart uploads / downloads and the ability to increase non-parallel input file sizes (#60, #61, #63, #65)) - Enable output file `.tar.gz`s across the board (#68) - Add an explicit `parallelize=True` flag (#68) Modifies: - Various fixes (#49, #56, #58, #64, #67) Integration tests: passing Unit tests: passing Manual tests: passing
1 parent 4f50414 commit 2ce77f4

30 files changed

+862
-525
lines changed

docs/index.rst

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ Tools
2929
Toolchest currently supports the following tools:
3030

3131
* Bowtie2 (`bowtie2`)
32-
* Cutadapt (`cutadapt`)
3332
* Kraken2 (`kraken2`)
3433
* STAR (`STAR`)
3534
* Unicycler (`unicycler`)

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "toolchest-client"
3-
version = "0.7.30"
3+
version = "0.7.39"
44
description = "Python client for Toolchest"
55
authors = [
66
"Bryce Cai <bcai@trytoolchest.com>",
@@ -23,6 +23,7 @@ packages = [
2323
"Bug Tracker" = "https://github.com/trytoolchest/toolchest-client-python/issues"
2424

2525
[tool.poetry.dependencies]
26+
boto3 = "^1.18.29"
2627
python = "^3.6"
2728
requests = "^2.25.1"
2829
python-dotenv = "^0.18.0"
@@ -31,7 +32,6 @@ importlib-metadata = { version = "~=1.0", python = "<3.8" }
3132

3233
[tool.poetry.dev-dependencies]
3334
pytest = "^6.2.4"
34-
boto3 = "^1.18.29"
3535

3636
[[tool.poetry.source]]
3737
name = "pypi-public"

tests/test_kraken2.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,3 +62,26 @@ def test_kraken2_paired_end():
6262

6363
# Kraken 2 paired-end is not completely deterministic, and consistently alternates between these two hashes
6464
assert hash.unordered(output_file_path) in [1076645572, 1174140935]
65+
66+
@pytest.mark.integration
67+
def test_kraken2_s3():
68+
"""
69+
Tests Kraken 2 with an example input in S3 against the std (v1) DB
70+
"""
71+
test_dir = "test_kraken2_standard"
72+
os.makedirs(f"./{test_dir}", exist_ok=True)
73+
input_file_path = "./kraken_input.fasta"
74+
output_dir_path = f"./{test_dir}/"
75+
output_file_path = f"{output_dir_path}kraken2_output.txt"
76+
77+
s3.download_integration_test_input(
78+
s3_file_key="synthetic_bacteroides_reads.fasta",
79+
output_file_path=input_file_path,
80+
)
81+
82+
toolchest.kraken2(
83+
inputs="s3://toolchest-integration-tests-public/synthetic_bacteroides_reads.fasta",
84+
output_path=output_dir_path,
85+
)
86+
87+
assert hash.unordered(output_file_path) == 886254946

tests/test_shogun.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,11 @@ def test_shogun_filter_and_align():
1515
Tests shogun (filter and align for simplicity) with a single R1 input
1616
"""
1717

18-
test_dir = "test_shogun_filter_and_align"
19-
os.makedirs(f"./{test_dir}", exist_ok=True)
18+
test_dir = "./test_shogun_filter_and_align"
19+
os.makedirs(f"{test_dir}", exist_ok=True)
2020
input_file_path = f"./{test_dir}/combined_seqs_unfiltered.fna"
21-
output_file_path_filter = f"./{test_dir}/combined_seqs_filtered.fna"
22-
output_file_path_align = f"./{test_dir}/burst_output.b6"
21+
output_file_path_filter = f"./{test_dir}/combined_seqs.filtered.fna"
22+
output_file_path_align = f"./{test_dir}/alignment.burst.b6"
2323

2424
s3.download_integration_test_input(
2525
s3_file_key="combined_seqs_unfiltered.fna",
@@ -29,15 +29,15 @@ def test_shogun_filter_and_align():
2929
toolchest.shogun_filter(
3030
tool_args="--alignment True",
3131
inputs=input_file_path,
32-
output_path=output_file_path_filter,
32+
output_path=test_dir,
3333
)
3434

3535
assert hash.unordered(output_file_path_filter) == 510167908
3636

3737
toolchest.shogun_align(
3838
tool_args="",
3939
inputs=output_file_path_filter,
40-
output_path=output_file_path_align,
40+
output_path=test_dir,
4141
)
4242

4343
assert hash.unordered(output_file_path_align) == 780853697

tests/test_star.py

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
import os
2+
import pytest
3+
4+
from tests.util import s3
5+
import toolchest_client as toolchest
6+
7+
toolchest_api_key = os.environ.get("TOOLCHEST_API_KEY")
8+
if toolchest_api_key:
9+
toolchest.set_key(toolchest_api_key)
10+
11+
12+
@pytest.mark.integration
13+
def test_star_grch38():
14+
"""
15+
Tests STAR against the grch38 database
16+
"""
17+
test_dir = "test_star_grch38"
18+
os.makedirs(f"./{test_dir}", exist_ok=True)
19+
input_file_path = "./small_star.fastq"
20+
output_dir_path = f"./{test_dir}/"
21+
output_file_path = f"{output_dir_path}Aligned.out.sam"
22+
23+
s3.download_integration_test_input(
24+
s3_file_key="small_star_500k.fastq",
25+
output_file_path=input_file_path,
26+
)
27+
28+
toolchest.STAR(
29+
read_one=input_file_path,
30+
output_path=output_dir_path,
31+
database_name="GRCh38",
32+
)
33+
34+
# Because STAR is non-deterministic, verify that the number of bytes is in range
35+
assert 185952744 <= os.path.getsize(output_file_path) <= 185952766
36+
37+
38+
@pytest.mark.integration
39+
def test_star_grch38_parallel():
40+
"""
41+
Tests STAR against the grch38 database, using parallel mode
42+
"""
43+
test_dir = "test_star_grch38_parallel"
44+
os.makedirs(f"./{test_dir}", exist_ok=True)
45+
input_file_path = "./large_star.fastq"
46+
output_dir_path = f"./{test_dir}/"
47+
output_file_path = f"{output_dir_path}Aligned.out.sam"
48+
49+
s3.download_integration_test_input(
50+
s3_file_key="large_star_15GB.fastq",
51+
output_file_path=input_file_path,
52+
)
53+
54+
toolchest.STAR(
55+
read_one=input_file_path,
56+
output_path=output_file_path,
57+
database_name="GRCh38",
58+
parallelize=True,
59+
)
60+
61+
# Because STAR is non-deterministic, verify that the number of bytes is in range
62+
assert 33292990718 <= os.path.getsize(output_file_path) <= 33292994718
63+
64+
65+
@pytest.mark.integration
66+
def test_star_grch38_dangerous_arg():
67+
"""
68+
Tests STAR against the grch38 database, with a dangerous arg (changing functionality)
69+
"""
70+
test_dir = "test_star_grch38"
71+
os.makedirs(f"./{test_dir}", exist_ok=True)
72+
input_file_path = "./small_star.fastq"
73+
output_dir_path = f"./{test_dir}/"
74+
output_file_path = f"{output_dir_path}Aligned.out.bam"
75+
76+
s3.download_integration_test_input(
77+
s3_file_key="small_star_500k.fastq",
78+
output_file_path=input_file_path,
79+
)
80+
81+
toolchest.STAR(
82+
read_one=input_file_path,
83+
output_path=output_dir_path,
84+
database_name="GRCh38",
85+
tool_args="--outSAMtype BAM Unsorted",
86+
parallelize=True, # this should be deliberately ignored
87+
)
88+
89+
# Because STAR is non-deterministic and BAMs are are compressed verify that the number of bytes is in range
90+
assert 38236020 <= os.path.getsize(output_file_path) <= 38236030
91+
92+
# Make sure all non-parallel files exist as well
93+
assert os.path.isfile(f"{output_dir_path}Log.final.out")
94+
assert os.path.isfile(f"{output_dir_path}Log.out")
95+
assert os.path.isfile(f"{output_dir_path}Log.progress.out")
96+
assert os.path.isfile(f"{output_dir_path}SJ.out.tab")

toolchest_client/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,4 +21,4 @@
2121
from toolchest_client.api.auth import get_key, set_key
2222
from toolchest_client.api.exceptions import ToolchestException, DataLimitError, ToolchestJobError
2323
from toolchest_client.api.query import Query
24-
from .tools.api import bowtie2, cellranger_mkfastq, cutadapt, kraken2, shi7, shogun_align, shogun_filter, STAR, test, unicycler
24+
from .tools.api import bowtie2, cellranger_mkfastq, kraken2, shi7, shogun_align, shogun_filter, STAR, test, unicycler

toolchest_client/api/exceptions.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ class ToolchestException(OSError):
1515
class ToolchestKeyError(ToolchestException):
1616
"""Invalid Toolchest auth key."""
1717

18+
class ToolchestS3AccessError(ToolchestException):
19+
"""S3 input cannot be accessed by Toolchest."""
1820

1921
class DataLimitError(ToolchestException):
2022
"""Data limit for Toolchest exceeded."""

0 commit comments

Comments
 (0)