Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ Tools
Toolchest currently supports the following tools:

* Bowtie2 (`bowtie2`)
* Cutadapt (`cutadapt`)
* Kraken2 (`kraken2`)
* STAR (`STAR`)
* Unicycler (`unicycler`)
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "toolchest-client"
version = "0.7.30"
version = "0.7.39"
description = "Python client for Toolchest"
authors = [
"Bryce Cai <bcai@trytoolchest.com>",
Expand All @@ -23,6 +23,7 @@ packages = [
"Bug Tracker" = "https://github.com/trytoolchest/toolchest-client-python/issues"

[tool.poetry.dependencies]
boto3 = "^1.18.29"
python = "^3.6"
requests = "^2.25.1"
python-dotenv = "^0.18.0"
Expand All @@ -31,7 +32,6 @@ importlib-metadata = { version = "~=1.0", python = "<3.8" }

[tool.poetry.dev-dependencies]
pytest = "^6.2.4"
boto3 = "^1.18.29"

[[tool.poetry.source]]
name = "pypi-public"
Expand Down
23 changes: 23 additions & 0 deletions tests/test_kraken2.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,26 @@ def test_kraken2_paired_end():

# Kraken 2 paired-end is not completely deterministic, and consistently alternates between these two hashes
assert hash.unordered(output_file_path) in [1076645572, 1174140935]

@pytest.mark.integration
def test_kraken2_s3():
"""
Tests Kraken 2 with an example input in S3 against the std (v1) DB
"""
test_dir = "test_kraken2_standard"
os.makedirs(f"./{test_dir}", exist_ok=True)
input_file_path = "./kraken_input.fasta"
output_dir_path = f"./{test_dir}/"
output_file_path = f"{output_dir_path}kraken2_output.txt"

s3.download_integration_test_input(
s3_file_key="synthetic_bacteroides_reads.fasta",
output_file_path=input_file_path,
)

toolchest.kraken2(
inputs="s3://toolchest-integration-tests-public/synthetic_bacteroides_reads.fasta",
output_path=output_dir_path,
)

assert hash.unordered(output_file_path) == 886254946
12 changes: 6 additions & 6 deletions tests/test_shogun.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@ def test_shogun_filter_and_align():
Tests shogun (filter and align for simplicity) with a single R1 input
"""

test_dir = "test_shogun_filter_and_align"
os.makedirs(f"./{test_dir}", exist_ok=True)
test_dir = "./test_shogun_filter_and_align"
os.makedirs(f"{test_dir}", exist_ok=True)
input_file_path = f"./{test_dir}/combined_seqs_unfiltered.fna"
output_file_path_filter = f"./{test_dir}/combined_seqs_filtered.fna"
output_file_path_align = f"./{test_dir}/burst_output.b6"
output_file_path_filter = f"./{test_dir}/combined_seqs.filtered.fna"
output_file_path_align = f"./{test_dir}/alignment.burst.b6"

s3.download_integration_test_input(
s3_file_key="combined_seqs_unfiltered.fna",
Expand All @@ -29,15 +29,15 @@ def test_shogun_filter_and_align():
toolchest.shogun_filter(
tool_args="--alignment True",
inputs=input_file_path,
output_path=output_file_path_filter,
output_path=test_dir,
)

assert hash.unordered(output_file_path_filter) == 510167908

toolchest.shogun_align(
tool_args="",
inputs=output_file_path_filter,
output_path=output_file_path_align,
output_path=test_dir,
)

assert hash.unordered(output_file_path_align) == 780853697
96 changes: 96 additions & 0 deletions tests/test_star.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import os
import pytest

from tests.util import s3
import toolchest_client as toolchest

toolchest_api_key = os.environ.get("TOOLCHEST_API_KEY")
if toolchest_api_key:
toolchest.set_key(toolchest_api_key)


@pytest.mark.integration
def test_star_grch38():
"""
Tests STAR against the grch38 database
"""
test_dir = "test_star_grch38"
os.makedirs(f"./{test_dir}", exist_ok=True)
input_file_path = "./small_star.fastq"
output_dir_path = f"./{test_dir}/"
output_file_path = f"{output_dir_path}Aligned.out.sam"

s3.download_integration_test_input(
s3_file_key="small_star_500k.fastq",
output_file_path=input_file_path,
)

toolchest.STAR(
read_one=input_file_path,
output_path=output_dir_path,
database_name="GRCh38",
)

# Because STAR is non-deterministic, verify that the number of bytes is in range
assert 185952744 <= os.path.getsize(output_file_path) <= 185952766


@pytest.mark.integration
def test_star_grch38_parallel():
"""
Tests STAR against the grch38 database, using parallel mode
"""
test_dir = "test_star_grch38_parallel"
os.makedirs(f"./{test_dir}", exist_ok=True)
input_file_path = "./large_star.fastq"
output_dir_path = f"./{test_dir}/"
output_file_path = f"{output_dir_path}Aligned.out.sam"

s3.download_integration_test_input(
s3_file_key="large_star_15GB.fastq",
output_file_path=input_file_path,
)

toolchest.STAR(
read_one=input_file_path,
output_path=output_file_path,
database_name="GRCh38",
parallelize=True,
)

# Because STAR is non-deterministic, verify that the number of bytes is in range
assert 33292990718 <= os.path.getsize(output_file_path) <= 33292994718


@pytest.mark.integration
def test_star_grch38_dangerous_arg():
"""
Tests STAR against the grch38 database, with a dangerous arg (changing functionality)
"""
test_dir = "test_star_grch38"
os.makedirs(f"./{test_dir}", exist_ok=True)
input_file_path = "./small_star.fastq"
output_dir_path = f"./{test_dir}/"
output_file_path = f"{output_dir_path}Aligned.out.bam"

s3.download_integration_test_input(
s3_file_key="small_star_500k.fastq",
output_file_path=input_file_path,
)

toolchest.STAR(
read_one=input_file_path,
output_path=output_dir_path,
database_name="GRCh38",
tool_args="--outSAMtype BAM Unsorted",
parallelize=True, # this should be deliberately ignored
)

# Because STAR is non-deterministic and BAMs are are compressed verify that the number of bytes is in range
assert 38236020 <= os.path.getsize(output_file_path) <= 38236030

# Make sure all non-parallel files exist as well
assert os.path.isfile(f"{output_dir_path}Log.final.out")
assert os.path.isfile(f"{output_dir_path}Log.out")
assert os.path.isfile(f"{output_dir_path}Log.progress.out")
assert os.path.isfile(f"{output_dir_path}SJ.out.tab")
2 changes: 1 addition & 1 deletion toolchest_client/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@
from toolchest_client.api.auth import get_key, set_key
from toolchest_client.api.exceptions import ToolchestException, DataLimitError, ToolchestJobError
from toolchest_client.api.query import Query
from .tools.api import bowtie2, cellranger_mkfastq, cutadapt, kraken2, shi7, shogun_align, shogun_filter, STAR, test, unicycler
from .tools.api import bowtie2, cellranger_mkfastq, kraken2, shi7, shogun_align, shogun_filter, STAR, test, unicycler
2 changes: 2 additions & 0 deletions toolchest_client/api/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ class ToolchestException(OSError):
class ToolchestKeyError(ToolchestException):
"""Invalid Toolchest auth key."""

class ToolchestS3AccessError(ToolchestException):
"""S3 input cannot be accessed by Toolchest."""

class DataLimitError(ToolchestException):
"""Data limit for Toolchest exceeded."""
Expand Down
Loading