Skip to content

Commit a880da0

Browse files
authored
Merge pull request #134 from trytoolchest/staging
2 parents c11274b + 033b01e commit a880da0

File tree

10 files changed

+28
-23
lines changed

10 files changed

+28
-23
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "toolchest-client"
3-
version = "0.9.0"
3+
version = "0.9.1"
44
description = "Python client for Toolchest"
55
authors = [
66
"Bryce Cai <bcai@trytoolchest.com>",

tests/test_cellranger.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,10 @@ def test_cellranger_count_s3_inputs():
1919
output = toolchest.cellranger_count(
2020
inputs="s3://toolchest-integration-tests/cellranger/count/pbmc_1k_v3_fastqs_trimmed.tar.gz",
2121
database_name="GRCh38",
22+
output_path=output_dir_path,
23+
skip_decompression=True,
2224
)
23-
verify_cellranger_count_outputs(output, output_dir_path)
25+
verify_cellranger_count_outputs(output.output_path, output_dir_path)
2426

2527

2628
@pytest.mark.integration
@@ -40,11 +42,13 @@ def test_cellranger_count_local_inputs():
4042
output = toolchest.cellranger_count(
4143
inputs=input_dir_path,
4244
database_name="GRCh38",
45+
output_path=output_dir_path,
46+
skip_decompression=True,
4347
)
44-
verify_cellranger_count_outputs(output, output_dir_path)
48+
verify_cellranger_count_outputs(output.output_path, output_dir_path)
4549

4650

47-
def verify_cellranger_count_outputs(output, output_dir_path):
51+
def verify_cellranger_count_outputs(archive_path, output_dir_path):
4852
# Expected properties of outputs
4953
MIN_EXPECTED_ARCHIVE_SIZE = 34000000
5054
MAX_EXPECTED_ARCHIVE_SIZE = 38000000
@@ -54,12 +58,6 @@ def verify_cellranger_count_outputs(output, output_dir_path):
5458
EXPECTED_FILTERED_MATRIX_SIZE = 503956
5559

5660
# Verify properties of packed archive
57-
archive_path = f"{output_dir_path}output.tar.gz"
58-
toolchest.download(
59-
output_path=output_dir_path,
60-
s3_uri=output.s3_uri,
61-
skip_decompression=True,
62-
)
6361
archive_size = os.path.getsize(archive_path)
6462
assert MIN_EXPECTED_ARCHIVE_SIZE <= archive_size <= MAX_EXPECTED_ARCHIVE_SIZE
6563

tests/test_chaining.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
toolchest.set_key(toolchest_api_key)
1010

1111
SHI7_SINGLE_END_HASH = 1570879637
12-
SHOGUN_CHAINED_HASH = 33856653
12+
SHOGUN_CHAINED_HASH = 1708070294
1313

1414

1515
@pytest.mark.integration
@@ -29,7 +29,7 @@ def test_shi7_shogun_chaining():
2929
test_dir = "test_shi7_shogun_chaining"
3030
os.makedirs(f"./{test_dir}", exist_ok=True)
3131
output_dir_path = f"./{test_dir}/"
32-
output_file_path_shogun = f"{output_dir_path}alignment.burst.b6"
32+
output_file_path_shogun = f"{output_dir_path}alignment.bowtie2.sam"
3333

3434
output_shi7 = toolchest.shi7(
3535
tool_args="-SE",

tests/test_shogun.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def test_shogun_filter_and_align():
1919
os.makedirs(f"{test_dir}", exist_ok=True)
2020
input_file_path = f"./{test_dir}/combined_seqs_unfiltered.fna"
2121
output_file_path_filter = f"./{test_dir}/combined_seqs.filtered.fna"
22-
output_file_path_align = f"./{test_dir}/alignment.burst.b6"
22+
output_file_path_align = f"./{test_dir}/alignment.bowtie2.sam"
2323

2424
s3.download_integration_test_input(
2525
s3_file_key="combined_seqs_unfiltered.fna",
@@ -40,5 +40,4 @@ def test_shogun_filter_and_align():
4040
inputs=output_file_path_filter,
4141
output_path=test_dir,
4242
)
43-
44-
assert hash.unordered(output_file_path_align) == 780853697
43+
assert hash.unordered(output_file_path_align) == 1952162202

toolchest_client/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from dotenv import load_dotenv, find_dotenv
55
import functools
66
import sentry_sdk
7+
import os
78

89
# set __version__ module
910
try:
@@ -34,5 +35,6 @@
3435
sentry_sdk.init(
3536
"https://c7db7e7a4ac349cc974c55f1fcb7d2f7@o1171636.ingest.sentry.io/6271973",
3637

37-
traces_sample_rate=1.0
38+
traces_sample_rate=1.0,
39+
environment=os.getenv("DEPLOY_ENVIRONMENT", 'production')
3840
)

toolchest_client/api/output.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,12 @@ def set_s3_uri(self, s3_uri):
4242
def set_output_path(self, output_path):
4343
self.output_path = output_path
4444

45-
def download(self, output_dir):
45+
def download(self, output_dir, skip_decompression=False):
4646
self.output_path = download(
4747
output_path=output_dir,
4848
s3_uri=self.s3_uri,
4949
run_id=self.run_id,
50+
skip_decompression=skip_decompression,
5051
)
5152
return self.output_path
5253

toolchest_client/api/query.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ def __init__(self, stored_output=None, is_async=False, pipeline_segment_instance
6565
def run_query(self, tool_name, tool_version, input_prefix_mapping,
6666
output_type, tool_args=None, database_name=None, database_version=None,
6767
custom_database_path=None, output_name="output", output_primary_name=None,
68-
input_files=None, output_path=None, thread_statuses=None):
68+
input_files=None, output_path=None, skip_decompression=False, thread_statuses=None):
6969
"""Executes a query to the Toolchest API.
7070
7171
:param tool_name: Tool to be used.
@@ -80,6 +80,7 @@ def run_query(self, tool_name, tool_version, input_prefix_mapping,
8080
:param input_files: List of paths to be passed in as input.
8181
:param output_path: Path (client-side) where the output file will be downloaded.
8282
:param output_type: Type (e.g. GZ_TAR) of the output file
83+
:param skip_decompression: Whether to skip decompression of the output file, if it is an archive
8384
:param thread_statuses: Statuses of all threads, shared between threads.
8485
"""
8586
self.thread_name = threading.current_thread().getName()
@@ -129,7 +130,7 @@ def run_query(self, tool_name, tool_version, input_prefix_mapping,
129130

130131
self._wait_for_job()
131132

132-
self._download(output_path, output_type)
133+
self._download(output_path, output_type, skip_decompression)
133134

134135
self.mark_as_failed = False
135136
self._update_status(Status.COMPLETE)
@@ -361,7 +362,7 @@ def _wait_for_job(self):
361362
leftover_delay = elapsed_time % self.WAIT_FOR_JOB_DELAY
362363
time.sleep(leftover_delay)
363364

364-
def _download(self, output_path, output_type):
365+
def _download(self, output_path, output_type, skip_decompression):
365366
"""Retrieves information needed for downloading. If ``output_path`` is given,
366367
downloads output to ``output_path`` and decompresses output archive, if necessary.
367368
"""
@@ -375,6 +376,7 @@ def _download(self, output_path, output_type):
375376
output_path=output_path,
376377
output_file_keys=output_file_keys,
377378
output_type=output_type,
379+
skip_decompression=skip_decompression,
378380
)
379381
self._update_status(Status.TRANSFERRED_TO_CLIENT)
380382
except ToolchestDownloadError as err:

toolchest_client/files/http.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
Functions for handling files given by HTTP / HTTPS URLs.
66
"""
77
from urllib.parse import urlparse
8+
from urllib3.exceptions import LocationParseError
89

910
import requests
1011
from requests.exceptions import HTTPError, InvalidURL, InvalidSchema
@@ -28,7 +29,7 @@ def path_is_http_url(path):
2829
"""
2930
try:
3031
get_http_url_file_size(get_url_with_protocol(path))
31-
except (InvalidURL, HTTPError, InvalidSchema):
32+
except (InvalidURL, HTTPError, InvalidSchema, LocationParseError):
3233
return False
3334

3435
return True

toolchest_client/tools/shogun.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def __init__(self, tool_args, output_name, inputs, output_path,
2828
parallel_enabled=False,
2929
output_type=OutputType.GZ_TAR,
3030
output_is_directory=True,
31-
output_names=["alignment.burst.b6"],
31+
output_names=["alignment.bowtie2.sam"],
3232
**kwargs,
3333
)
3434

toolchest_client/tools/tool.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def __init__(self, tool_name, tool_version, tool_args, output_name,
3737
max_input_bytes_per_file_parallel=FOUR_POINT_FIVE_GIGABYTES,
3838
group_paired_ends=False, compress_inputs=False,
3939
output_type=OutputType.FLAT_TEXT, output_is_directory=True,
40-
output_names=None, is_async=False):
40+
output_names=None, is_async=False, skip_decompression=False):
4141
self.tool_name = tool_name
4242
self.tool_version = tool_version
4343
self.tool_args = tool_args
@@ -76,6 +76,7 @@ def __init__(self, tool_name, tool_version, tool_args, output_name,
7676
self.thread_outputs = {}
7777
self.output_names = output_names or []
7878
self.is_async = is_async
79+
self.skip_decompression = skip_decompression
7980
signal.signal(signal.SIGTERM, self._handle_termination)
8081
signal.signal(signal.SIGINT, self._handle_termination)
8182

@@ -435,6 +436,7 @@ def run(self):
435436
"input_prefix_mapping": self.input_prefix_mapping,
436437
"output_path": temp_parallel_output_file_path if should_run_in_parallel else non_parallel_output_path,
437438
"output_type": self.output_type,
439+
"skip_decompression": self.skip_decompression,
438440
})
439441

440442
# Add non-distinct dictionary for status updates

0 commit comments

Comments
 (0)