Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create a script that converts CellBrowser config to Anndata-Zarr file which is digestable by Vitessce zero config mode #259

Merged
merged 18 commits into from
Jul 18, 2023
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,10 @@ dependencies = [
'black>=21.11b1',
'numpy>=1.21.2',
'anndata>=0.7.8,<0.9',
'scanpy>=1.9.3',
'ome-zarr==0.2.1',
'tifffile>=2020.10.1',
'jsonschema>=3.2'
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is used for the validator of the CellBrowser config

]

[project.optional-dependencies]
Expand Down
3 changes: 2 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,5 @@ ignore =
exclude =
./js/node_modules/,
./docs/notebooks/.ipynb_checkpoints/,
./build/
./build/,
./.ipynb_checkpoints/
Binary file added tests/data/smaller_expr_matrix.tsv.gz
ivababukova marked this conversation as resolved.
Show resolved Hide resolved
Binary file not shown.
Binary file added tests/data/test.coords.tsv.gz
Binary file not shown.
11 changes: 11 additions & 0 deletions tests/data/test_meta.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
cellId cluster age age_unit Key experiment_name fragAnalyzerRange nCells ng_ul plate_nr sample_recieve_date chip_type c1_chip_id enrichment_method capture_position gene_body_coverage intron_exon_ratio mapped_reads total_reads
X1000010011.A01 unsure 21 years NA panc_21y_141021 NA NA NA NA 141021 NA 1000010011 islet A01 0.649710043 67.255908 686835 822828
X1000010011.A02 alpha 21 years NA panc_21y_141021 NA NA NA NA 141021 NA 1000010011 islet A02 0.75127736 12.093265 905062 1269102
X1000010011.A03 unsure 21 years NA panc_21y_141021 NA NA NA NA 141021 NA 1000010011 islet A03 0.674989037 64.684568 951841 1697880
X1000010011.A04 21 years NA panc_21y_141021 NA NA NA NA 141021 NA 1000010011 islet A04 0.795234851 5.697826 981928 1576145
X1000010011.A05 delta 21 years NA panc_21y_141021 NA NA NA NA 141021 NA 1000010011 islet A05 0.716792727 46.780233 793411 1477465
X1000010011.A06 delta 21 years NA panc_21y_141021 NA NA NA NA 141021 NA 1000010011 islet A06 0.791774081 43.686701 1000518 1633234
X1000010011.A07 alpha 21 years NA panc_21y_141021 NA NA NA NA 141021 NA 1000010011 islet A07 0.74331027 39.149919 1115820 1609070
X1000010011.A08 unsure 21 years NA panc_21y_141021 NA NA NA NA 141021 NA 1000010011 islet A08 0.708191319 71.121361 1150879 1497557
X1000010011.A09 unsure 21 years NA panc_21y_141021 NA NA NA NA 141021 NA 1000010011 islet A09 0.707141994 20.499992 1134953 1547980
X1000010011.A10 unsure 21 years NA panc_21y_141021 NA NA NA NA 141021 NA 1000010011 islet A10 0.727188243 86.215003 1091517 1491687
284 changes: 284 additions & 0 deletions tests/test_config_converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,284 @@
import pytest
from unittest.mock import patch, Mock
import os
from os.path import join
from copy import deepcopy

from vitessce import (CellBrowserToAnndataZarrConverter, write_to_AnndataZarr_store)
from vitessce.data_utils import (
VAR_CHUNK_SIZE
)

valid_cellbrowser_config = {
"fileVersions": {
"inMatrix": {
"fname": "/hive/data/inside/cells/datasets/adultPancreas/exprMatrix.csv.gz",
"md5": "8da7a759a8",
"size": 23363664,
"mtime": "2018-10-16 23:29:40"
},
"outMatrix": {
"fname": "/usr/local/apache/htdocs-cells/adultPancreas/exprMatrix.tsv.gz",
"md5": "934bbdeacd",
"size": 22710325,
"mtime": "2022-05-18 22:34:06"
},
"inMeta": {
"fname": "/hive/data/inside/cells/datasets/adultPancreas/meta.tsv",
"md5": "7699cf188d",
"size": 527639,
"mtime": "2019-02-26 16:08:50"
},
"outMeta": {
"fname": "/usr/local/apache/htdocs-cells/adultPancreas/meta.tsv",
"md5": "cdfeda9e0a",
"size": 522326,
"mtime": "2022-05-24 18:01:35"
},
},
"coords": [
{
"name": "coords_0",
"shortLabel": "t-SNE",
"md5": "3ff37334ef",
"minX": 0,
"maxX": 65535,
"minY": 0,
"maxY": 65535,
"type": "Uint16",
"textFname": "test.coords.tsv.gz",
"labelMd5": "d41d8cd98f"
}
],
"topMarkers": {
"acinar": [
"A1CF",
],
"alpha": [
"A1BG-AS1",
],
"beta": [
"LEPR",
],
"delta": [
"SST",
"RBP4",
],
"ductal": [
"ANXA4",
],
"mesenchymal": [
"SPARCL1",
],
"nan": [
"ERCC-00092",
],
"unsure": [
"G6PC2",
"PCSK1",
]
},
}

invalid_cellbrowser_config = deepcopy(valid_cellbrowser_config["fileVersions"])

project_name = "test-project"
output_dir = "test-output-dir"


@pytest.fixture
def mock_makedirs():
with patch('os.makedirs') as mock:
yield mock


@pytest.fixture
def mock_write_zarr():
with patch('anndata.AnnData.write_zarr') as mock:
yield mock


@pytest.fixture
def mock_filter_cells():
with patch('scanpy.pp.filter_cells') as mock:
yield mock


@pytest.fixture
def mock_end_to_end_tests():
# Set up the Mock to return a fake response when called
mock_response_json = Mock()
mock_response_json.json.return_value = valid_cellbrowser_config

with open('tests/data/smaller_expr_matrix.tsv.gz', 'rb') as f:
mock_response_expr_matrix = Mock()
mock_response_expr_matrix.content = f.read()

with open('tests/data/test_meta.tsv', 'rb') as f:
mock_response_meta = Mock()
mock_response_meta.content = f.read()

with open('tests/data/test.coords.tsv.gz', 'rb') as f:
mock_response_coords = Mock()
mock_response_coords.content = f.read()

with patch('requests.get') as mock_get:
mock_get.side_effect = [mock_response_json, mock_response_expr_matrix, mock_response_meta, mock_response_coords]
yield mock_get


def test_download_valid_config():

with patch('requests.get') as mock_get:
mock_get.return_value.json.return_value = valid_cellbrowser_config
obj = CellBrowserToAnndataZarrConverter(project_name, output_dir, False)
is_valid = obj.download_config()

mock_get.assert_called_once_with('https://cells.ucsc.edu/test-project/dataset.json')
assert is_valid
assert obj.cellbrowser_config == valid_cellbrowser_config


def test_filter_based_on_marker_genes(mock_end_to_end_tests, mock_filter_cells):

inst = CellBrowserToAnndataZarrConverter(project_name, output_dir, True)
config_is_valid = inst.download_config()

assert config_is_valid
inst.create_anndata_object()

assert inst.adata.shape == (8, 1)

mock_end_to_end_tests.assert_any_call("https://cells.ucsc.edu/test-project/dataset.json")
mock_end_to_end_tests.assert_any_call("https://cells.ucsc.edu/test-project/exprMatrix.tsv.gz")
mock_end_to_end_tests.assert_any_call("https://cells.ucsc.edu/test-project/meta.tsv")
mock_end_to_end_tests.assert_any_call("https://cells.ucsc.edu/test-project/test.coords.tsv.gz")

assert mock_end_to_end_tests.call_count == 4
assert mock_filter_cells.call_count == 1


def test_end_to_end(mock_makedirs, mock_write_zarr, mock_filter_cells, mock_end_to_end_tests):
write_to_AnndataZarr_store(project_name, output_dir, keep_only_marker_genes=False)

mock_end_to_end_tests.assert_any_call("https://cells.ucsc.edu/test-project/dataset.json")
mock_end_to_end_tests.assert_any_call("https://cells.ucsc.edu/test-project/exprMatrix.tsv.gz")
mock_end_to_end_tests.assert_any_call("https://cells.ucsc.edu/test-project/meta.tsv")
mock_end_to_end_tests.assert_any_call("https://cells.ucsc.edu/test-project/test.coords.tsv.gz")

assert mock_end_to_end_tests.call_count == 4
assert mock_filter_cells.call_count == 1
mock_makedirs.assert_called_once_with(os.path.dirname(join(output_dir, project_name)), exist_ok=True)
mock_write_zarr.assert_called_once_with(join(output_dir, project_name, "out.adata.zarr"), chunks=[8, VAR_CHUNK_SIZE])


def test_end_to_end_invalid_config(mock_makedirs, mock_write_zarr, mock_filter_cells):
with patch('requests.get') as mock_get:
mock_get.return_value.json.return_value = invalid_cellbrowser_config
with pytest.raises(ValueError):
write_to_AnndataZarr_store(project_name, output_dir, keep_only_marker_genes=False)

mock_get.assert_called_once_with("https://cells.ucsc.edu/test-project/dataset.json")

assert mock_get.call_count == 1
assert mock_makedirs.call_count == 0
assert mock_write_zarr.call_count == 0
assert mock_filter_cells.call_count == 0


def test_end_to_end_download_config_raises_exception(mock_makedirs, mock_write_zarr, mock_filter_cells):
mock_response = Mock()
mock_response.raise_for_status.side_effect = Exception("Error downloading file")

with patch('requests.get') as mock_get:
mock_get.return_value = mock_response
with pytest.raises(Exception):
write_to_AnndataZarr_store(project_name, output_dir, keep_only_marker_genes=False)

mock_get.assert_called_once_with("https://cells.ucsc.edu/test-project/dataset.json")

assert mock_get.call_count == 1
assert mock_makedirs.call_count == 0
assert mock_write_zarr.call_count == 0
assert mock_filter_cells.call_count == 0


def test_end_to_end_load_expr_matrix_raises_exception(mock_makedirs, mock_write_zarr, mock_filter_cells):
mock_first_response = Mock()
mock_first_response.json.return_value = valid_cellbrowser_config

mock_second_response = Mock()
mock_second_response.raise_for_status.side_effect = Exception("Error downloading file")

with patch('requests.get') as mock_get:
mock_get.side_effect = [mock_first_response, mock_second_response]
with pytest.raises(Exception):
write_to_AnndataZarr_store(project_name, output_dir, keep_only_marker_genes=False)

mock_get.assert_any_call("https://cells.ucsc.edu/test-project/dataset.json")
mock_get.assert_any_call("https://cells.ucsc.edu/test-project/exprMatrix.tsv.gz")
assert mock_get.call_count == 2
assert mock_makedirs.call_count == 0
assert mock_write_zarr.call_count == 0
assert mock_filter_cells.call_count == 0


def test_end_to_end_load_cell_metadata_raises_exception(mock_makedirs, mock_write_zarr, mock_filter_cells):
mock_get_config = Mock()
mock_get_config.json.return_value = valid_cellbrowser_config

with open('tests/data/smaller_expr_matrix.tsv.gz', 'rb') as f:
mock_response_expr_matrix = Mock()
mock_response_expr_matrix.content = f.read()
mock_response_expr_matrix.raise_for_status.return_value = None

mock_response_meta = Mock()
mock_response_meta.raise_for_status.side_effect = Exception("Error downloading file")
assert mock_makedirs.call_count == 0
assert mock_write_zarr.call_count == 0
assert mock_filter_cells.call_count == 0

with patch('requests.get') as mock_get:
mock_get.side_effect = [mock_get_config, mock_response_expr_matrix, mock_response_meta]
with pytest.raises(Exception):
write_to_AnndataZarr_store(project_name, output_dir, keep_only_marker_genes=False)

mock_get.assert_any_call("https://cells.ucsc.edu/test-project/dataset.json")
mock_get.assert_any_call("https://cells.ucsc.edu/test-project/exprMatrix.tsv.gz")
mock_get.assert_any_call("https://cells.ucsc.edu/test-project/meta.tsv")
assert mock_get.call_count == 3
assert mock_makedirs.call_count == 0
assert mock_write_zarr.call_count == 0
assert mock_filter_cells.call_count == 0


def test_end_to_end_add_coords_raises_exception(mock_makedirs, mock_write_zarr, mock_filter_cells):
mock_get_config = Mock()
mock_get_config.json.return_value = valid_cellbrowser_config

with open('tests/data/smaller_expr_matrix.tsv.gz', 'rb') as f:
mock_response_expr_matrix = Mock()
mock_response_expr_matrix.content = f.read()
mock_response_expr_matrix.raise_for_status.return_value = None

with open('tests/data/test_meta.tsv', 'rb') as f:
mock_response_meta = Mock()
mock_response_meta.content = f.read()
mock_response_meta.raise_for_status.return_value = None

mock_coords = Mock()
mock_coords.raise_for_status.side_effect = Exception("Error downloading file")

with patch('requests.get') as mock_get:
mock_get.side_effect = [mock_get_config, mock_response_expr_matrix, mock_response_meta, mock_coords]
with pytest.raises(Exception):
write_to_AnndataZarr_store(project_name, output_dir, keep_only_marker_genes=False)

mock_get.assert_any_call("https://cells.ucsc.edu/test-project/dataset.json")
mock_get.assert_any_call("https://cells.ucsc.edu/test-project/exprMatrix.tsv.gz")
mock_get.assert_any_call("https://cells.ucsc.edu/test-project/meta.tsv")
mock_get.assert_any_call("https://cells.ucsc.edu/test-project/test.coords.tsv.gz")
assert mock_get.call_count == 4
assert mock_makedirs.call_count == 0
assert mock_write_zarr.call_count == 0
assert mock_filter_cells.call_count == 0
6 changes: 6 additions & 0 deletions vitessce/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@
BASE_URL_PLACEHOLDER,
)

from .config_converter import (
CellBrowserToAnndataZarrConverter,
convert_to_vitessce_view_config,
write_to_AnndataZarr_store
)

from .wrappers import AbstractWrapper

# We allow installation without all of the dependencies that the widget requires.
Expand Down
Loading
Loading