Skip to content

File sources for gdrive, gcs, onedata, basespace #12500

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Dec 21, 2021
Merged
12 changes: 12 additions & 0 deletions lib/galaxy/dependencies/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,18 @@ def check_s3fs(self):
# use s3fs directly (skipping pyfilesystem) for direct access to more options
return 's3fs' in self.file_sources

def check_fs_googledrivefs(self):
return 'googledrive' in self.file_sources

def check_fs_gcsfs(self):
return 'googlecloudstorage' in self.file_sources

def check_fs_onedatafs(self):
return 'onedata' in self.file_sources

def check_fs_basespace(self):
return 'basespace' in self.file_sources

def check_watchdog(self):
install_set = {'auto', 'True', 'true', 'polling', True}
return (self.config['watch_tools'] in install_set
Expand Down
4 changes: 4 additions & 0 deletions lib/galaxy/dependencies/conditional-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ fs.sshfs # type: ssh
fs-s3fs # type: s3
s3fs # type: s3fs
fs.anvilfs # type: anvil
fs.googledrivefs # type: googledrive
fs-gcsfs # type: googlecloudstorage
fs-onedatafs # type: onedata
fs-basespace # type: basespace

# Chronos client
chronos-python==1.2.1
Expand Down
20 changes: 20 additions & 0 deletions lib/galaxy/files/sources/basespace.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
try:
from fs_basespace import BASESPACEFS
except ImportError:
BASESPACEFS = None

from ._pyfilesystem2 import PyFilesystem2FilesSource


class BaseSpaceFilesSource(PyFilesystem2FilesSource):
plugin_type = 'basespace'
required_module = BASESPACEFS
required_package = "fs-basespace"

def _open_fs(self, user_context):
props = self._serialization_props(user_context)
handle = BASESPACEFS(**props)
return handle


__all__ = ('BaseSpaceFilesSource',)
30 changes: 30 additions & 0 deletions lib/galaxy/files/sources/googlecloudstorage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
try:
from fs_gcsfs import GCSFS
from google.cloud.storage import Client
from google.oauth2.credentials import Credentials
except ImportError:
GCSFS = None

from ._pyfilesystem2 import PyFilesystem2FilesSource


class GoogleCloudStorageFilesSource(PyFilesystem2FilesSource):
plugin_type = 'googlecloudstorage'
required_module = GCSFS
required_package = "fs-gcsfs"

def _open_fs(self, user_context):
props = self._serialization_props(user_context)
bucket_name = props.pop('bucket_name', None)
root_path = props.pop('root_path', None)
project = props.pop('project', None)
args = {}
if props.get('anonymous'):
args['client'] = Client.create_anonymous_client()
elif props.get('token'):
args['client'] = Client(project=project, credentials=Credentials(**props))
handle = GCSFS(bucket_name, root_path=root_path, retry=0, **args)
return handle


__all__ = ('GoogleCloudStorageFilesSource',)
22 changes: 22 additions & 0 deletions lib/galaxy/files/sources/googledrive.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
try:
from fs.googledrivefs import GoogleDriveFS
from google.oauth2.credentials import Credentials
except ImportError:
GoogleDriveFS = None

from ._pyfilesystem2 import PyFilesystem2FilesSource


class GoogleDriveFilesSource(PyFilesystem2FilesSource):
plugin_type = 'googledrive'
required_module = GoogleDriveFS
required_package = "fs.googledrivefs"

def _open_fs(self, user_context):
props = self._serialization_props(user_context)
credentials = Credentials(**props)
handle = GoogleDriveFS(credentials)
return handle


__all__ = ('GoogleDriveFilesSource',)
20 changes: 20 additions & 0 deletions lib/galaxy/files/sources/onedata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
try:
from fs.onedatafs import OnedataFS
except ImportError:
OnedataFS = None

from ._pyfilesystem2 import PyFilesystem2FilesSource


class OneDataFilesSource(PyFilesystem2FilesSource):
plugin_type = 'onedata'
required_module = OnedataFS
required_package = "fs-onedatafs"

def _open_fs(self, user_context):
props = self._serialization_props(user_context)
handle = OnedataFS(**props)
return handle


__all__ = ('OneDataFilesSource',)
1 change: 1 addition & 0 deletions packages/files/test-requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
pytest
fs-gcsfs
88 changes: 63 additions & 25 deletions test/unit/files/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from galaxy.files import (
ConfiguredFileSources,
ConfiguredFileSourcesConfig,
DictFileSourcesUserContext,
)

Expand Down Expand Up @@ -55,6 +56,21 @@ def user_context_fixture(user_ftp_dir=None, role_names=None, group_names=None, i
preferences={
'webdav|password': 'secret1234',
'dropbox|access_token': os.environ.get('GALAXY_TEST_DROPBOX_ACCESS_TOKEN'),
'googledrive|client_id': os.environ.get('GALAXY_TEST_GOOGLE_DRIVE_CLIENT_ID'),
'googledrive|client_secret': os.environ.get('GALAXY_TEST_GOOGLE_DRIVE_CLIENT_SECRET'),
'googledrive|access_token': os.environ.get('GALAXY_TEST_GOOGLE_DRIVE_ACCESS_TOKEN'),
'googledrive|refresh_token': os.environ.get('GALAXY_TEST_GOOGLE_DRIVE_REFRESH_TOKEN'),
'googlecloudstorage|project': os.environ.get('GALAXY_TEST_GCS_PROJECT'),
'googlecloudstorage|bucket_name': os.environ.get('GALAXY_TEST_GCS_BUCKET'),
'googlecloudstorage|client_id': os.environ.get('GALAXY_TEST_GCS_CLIENT_ID'),
'googlecloudstorage|client_secret': os.environ.get('GALAXY_TEST_GCS_CLIENT_SECRET'),
'googlecloudstorage|access_token': os.environ.get('GALAXY_TEST_GCS_ACCESS_TOKEN'),
'googlecloudstorage|refresh_token': os.environ.get('GALAXY_TEST_GCS_REFRESH_TOKEN'),
'onedata|provider_host': os.environ.get('GALAXY_TEST_ONEDATA_PROVIDER_HOST'),
'onedata|access_token': os.environ.get('GALAXY_TEST_ONEDATA_ACCESS_TOKEN'),
'basespace|client_id': os.environ.get('GALAXY_TEST_ONEDATA_CLIENT_ID'),
'basespace|client_secret': os.environ.get('GALAXY_TEST_ONEDATA_CLIENT_SECRET'),
'basespace|access_token': os.environ.get('GALAXY_TEST_ONEDATA_ACCESS_TOKEN'),
},
role_names=role_names or set(),
group_names=group_names or set(),
Expand All @@ -63,43 +79,43 @@ def user_context_fixture(user_ftp_dir=None, role_names=None, group_names=None, i
return user_context


def assert_realizes_as(file_sources, uri, expected, user_context=None):
def realize_to_temp_file(file_sources, uri, user_context=None):
file_source_path = file_sources.get_file_source_path(uri)
with tempfile.NamedTemporaryFile(mode='r') as temp:
file_source_path.file_source.realize_to(file_source_path.path, temp.name, user_context=user_context)
with open(temp.name) as f:
realized_contents = f.read()
if realized_contents != expected:
message = "Expected to realize contents at [{}] as [{}], instead found [{}]".format(
uri,
expected,
realized_contents,
)
raise AssertionError(message)
return realized_contents


def assert_realizes_as(file_sources, uri, expected, user_context=None):
realized_contents = realize_to_temp_file(file_sources, uri, user_context=user_context)
if realized_contents != expected:
message = "Expected to realize contents at [{}] as [{}], instead found [{}]".format(
uri,
expected,
realized_contents,
)
raise AssertionError(message)


def assert_realizes_contains(file_sources, uri, expected, user_context=None):
file_source_path = file_sources.get_file_source_path(uri)
with tempfile.NamedTemporaryFile(mode='r') as temp:
file_source_path.file_source.realize_to(file_source_path.path, temp.name, user_context=user_context)
realized_contents = temp.read()
if expected not in realized_contents:
message = "Expected to realize contents at [{}] to contain [{}], instead found [{}]".format(
uri,
expected,
realized_contents,
)
raise AssertionError(message)
realized_contents = realize_to_temp_file(file_sources, uri, user_context=user_context)
if expected not in realized_contents:
message = "Expected to realize contents at [{}] to contain [{}], instead found [{}]".format(
uri,
expected,
realized_contents,
)
raise AssertionError(message)


def assert_realizes_throws_exception(file_sources, uri, user_context=None) -> Exception:
file_source_path = file_sources.get_file_source_path(uri)
exception = None
with tempfile.NamedTemporaryFile(mode='r') as temp:
try:
file_source_path.file_source.realize_to(file_source_path.path, temp.name, user_context=user_context)
except Exception as e:
exception = e
try:
realize_to_temp_file(file_sources, uri, user_context=user_context)
except Exception as e:
exception = e
assert exception
return exception

Expand All @@ -110,3 +126,25 @@ def write_from(file_sources, uri, content, user_context=None):
f.write(content)
f.flush()
file_source_path.file_source.write_from(file_source_path.path, f.name, user_context=user_context)


def configured_file_sources(conf_file):
file_sources_config = ConfiguredFileSourcesConfig()
return ConfiguredFileSources(file_sources_config, conf_file=conf_file)


def assert_simple_file_realize(conf_file, recursive=False, filename="a", contents="a\n", contains=False):
user_context = user_context_fixture()
file_sources = configured_file_sources(conf_file)
file_source_pair = file_sources.get_file_source_path("gxfiles://test1")

assert file_source_pair.path == "/"
file_source = file_source_pair.file_source
res = file_source.list("/", recursive=recursive, user_context=user_context)
a_file = find(res, class_="File", name=filename)
assert a_file

if contains:
assert_realizes_contains(file_sources, f"gxfiles://test1/{filename}", contents, user_context=user_context)
else:
assert_realizes_as(file_sources, f"gxfiles://test1/{filename}", contents, user_context=user_context)
7 changes: 7 additions & 0 deletions test/unit/files/basespace_file_sources_conf.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
- type: basespace
id: test1
doc: Test access to Illumina BaseSpace
basespace_server: https://api.basespace.illumina.com
client_id: ${user.preferences['basespace|client_id']}
client_secret: ${user.preferences['basespace|client_secret']}
access_token: ${user.preferences['basespace|access_token']}
2 changes: 1 addition & 1 deletion test/unit/files/dropbox_file_sources_conf.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
- type: dropbox
id: test1
doc: Test WebDAV server.
doc: Test access to a dropbox account.
accessToken: ${user.preferences['dropbox|access_token']}
11 changes: 11 additions & 0 deletions test/unit/files/gcsfs_file_sources_conf.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
- type: googlecloudstorage
id: test1
doc: Test access to Google Cloud Storage.
project: ${user.preferences['googlecloudstorage|project']}
bucket_name: 'genomics-public-data'
token_uri: "https://www.googleapis.com/oauth2/v4/token"
client_id: ${user.preferences['googlecloudstorage|client_id']}
client_secret: ${user.preferences['googlecloudstorage|client_secret']}
token: ${user.preferences['googlecloudstorage|access_token']}
refresh_token: ${user.preferences['googlecloudstorage|refresh_token']}
anonymous: true
8 changes: 8 additions & 0 deletions test/unit/files/googledrive_file_sources_conf.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
- type: googledrive
id: test1
doc: Test access to a Google drive.
token: ${user.preferences['googledrive|access_token']}
refresh_token: ${user.preferences['googledrive|refresh_token']}
token_uri: "https://www.googleapis.com/oauth2/v4/token"
client_id: ${user.preferences['googledrive|client_id']}
client_secret: ${user.preferences['googledrive|client_secret']}
5 changes: 5 additions & 0 deletions test/unit/files/onedata_file_sources_conf.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
- type: onedata
id: test1
doc: Test access to a OneData host
provider_host: ${user.preferences['onedata|provider_host']}
access_token: ${user.preferences['onedata|access_token']}
36 changes: 36 additions & 0 deletions test/unit/files/test_basespace.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import os

import pytest

from ._util import (
assert_realizes_as,
configured_file_sources,
find,
user_context_fixture,
)
SCRIPT_DIRECTORY = os.path.abspath(os.path.dirname(__file__))
FILE_SOURCES_CONF = os.path.join(SCRIPT_DIRECTORY, "basespace_file_sources_conf.yml")

skip_if_no_basespace_access_token = pytest.mark.skipif(
not os.environ.get('GALAXY_TEST_BASESPACE_CLIENT_ID')
or not os.environ.get('GALAXY_TEST_BASESPACE_CLIENT_SECRET')
or not os.environ.get('GALAXY_TEST_BASESPACE_ACCESS_TOKEN')
or not os.environ.get('GALAXY_TEST_BASESPACE_TEST_FILE_PATH'),
reason="GALAXY_TEST_BASESPACE_CLIENT_ID and related vars not set"
)


@skip_if_no_basespace_access_token
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The added unit tests are all using the same test structure, can you parameterize them ?

Copy link
Member Author

@nuwang nuwang Dec 20, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I think they can be. It's something that's generally the case for many file source tests. Maybe a refactoring run that should be done outside of this PR?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you at least push _configured_file_sources into the helper ? That is literally the same in all those tests.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have refactored the tests and helpers. Can you take a look?

def test_file_source():
user_context = user_context_fixture()
file_sources = configured_file_sources(FILE_SOURCES_CONF)
file_source_pair = file_sources.get_file_source_path("gxfiles://test1")

assert file_source_pair.path == "/"
file_source = file_source_pair.file_source
test_file = os.environ.get('GALAXY_TEST_BASESPACE_TEST_FILE_PATH', "")
res = file_source.list(os.path.dirname(test_file), recursive=False, user_context=user_context)
a_file = find(res, class_="File", name=os.path.basename(test_file))
assert a_file

assert_realizes_as(file_sources, a_file['uri'], "a\n", user_context=user_context)
25 changes: 3 additions & 22 deletions test/unit/files/test_dropbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,8 @@

import pytest

from galaxy.files import ConfiguredFileSources, ConfiguredFileSourcesConfig
from ._util import (
assert_realizes_as,
find_file_a,
user_context_fixture,
)
from ._util import assert_simple_file_realize

SCRIPT_DIRECTORY = os.path.abspath(os.path.dirname(__file__))
FILE_SOURCES_CONF = os.path.join(SCRIPT_DIRECTORY, "dropbox_file_sources_conf.yml")

Expand All @@ -19,19 +15,4 @@

@skip_if_no_dropbox_access_token
def test_file_source():
user_context = user_context_fixture()
file_sources = _configured_file_sources()
file_source_pair = file_sources.get_file_source_path("gxfiles://test1")

assert file_source_pair.path == "/"
file_source = file_source_pair.file_source
res = file_source.list("/", recursive=True, user_context=user_context)
a_file = find_file_a(res)
assert a_file

assert_realizes_as(file_sources, "gxfiles://test1/a", "a\n", user_context=user_context)


def _configured_file_sources(conf_file=FILE_SOURCES_CONF):
file_sources_config = ConfiguredFileSourcesConfig()
return ConfiguredFileSources(file_sources_config, conf_file=conf_file)
assert_simple_file_realize(FILE_SOURCES_CONF, recursive=True)
25 changes: 25 additions & 0 deletions test/unit/files/test_gcsfs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import os

import pytest

from ._util import assert_simple_file_realize

try:
from fs_gcsfs import GCSFS
except ImportError:
GCSFS = None

SCRIPT_DIRECTORY = os.path.abspath(os.path.dirname(__file__))
FILE_SOURCES_CONF = os.path.join(SCRIPT_DIRECTORY, "gcsfs_file_sources_conf.yml")


skip_if_no_gcsfs_libs = pytest.mark.skipif(
not GCSFS,
reason="Required lib to run gcs file source test: fs_gcsfs is not available"
)


@skip_if_no_gcsfs_libs
def test_file_source():
assert_simple_file_realize(FILE_SOURCES_CONF, recursive=False, filename="README", contents="1000genomes",
contains=True)
Loading