Skip to content

Added support for bioconductor packages #7

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 23 additions & 15 deletions easybuild/framework/easyblock.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
from easybuild.framework.easyconfig.format.format import SANITY_CHECK_PATHS_DIRS, SANITY_CHECK_PATHS_FILES
from easybuild.framework.easyconfig.parser import fetch_parameters_from_easyconfig
from easybuild.framework.easyconfig.style import MAX_LINE_LENGTH
from easybuild.framework.easyconfig.tools import dump_env_easyblock, get_paths_for, get_pkg_metadata, clean_pkg_metadata
from easybuild.framework.easyconfig.tools import dump_env_easyblock, get_paths_for, get_pkg_metadata, get_pkg_as_extension
from easybuild.framework.easyconfig.templates import TEMPLATE_NAMES_EASYBLOCK_RUN_STEP, template_constant_dict
from easybuild.framework.extension import Extension, resolve_exts_filter_template
from easybuild.tools import LooseVersion, config, run
Expand Down Expand Up @@ -283,6 +283,8 @@ def __init__(self, ec):
if self.dry_run:
self.init_dry_run()

self.bioconductor_version = fetch_parameters_from_easyconfig(self.cfg.rawtxt, ["local_biocver"])[0]

self.log.info("Init completed for application name %s version %s" % (self.name, self.version))

def post_init(self):
Expand Down Expand Up @@ -2875,42 +2877,48 @@ def get_updated_exts_list(self):
# init variables
updated_exts_list = []

if self.bioconductor_version:
print_msg("Using Bioconductor v%s...\n" % (self.bioconductor_version), log=_log)
else:
print_msg("local_biocver parameter not set in easyconfig. Bioconductor packages will not be considered.\n", log=_log)

# loop over all extensions and update their version
for ext in self.exts:

# get package information
ext_class = self.cfg.get('exts_defaultclass', None)
ext_name = ext.get('name', None)
ext_version = ext.get('version', None)

# get metadata of the latest version of the extension
metadata = get_pkg_metadata(pkg_class=self.cfg.get('exts_defaultclass', None),
pkg_name=ext.get('name', None))
metadata = get_pkg_metadata(pkg_class=ext_class,
pkg_name=ext_name,
pkg_version=None,
bioc_version=self.bioconductor_version)

# process the metadata
if metadata:

# build the package
pkg = {"name": metadata['name'],
"version": metadata['version'],
"options": {"checksums": [metadata['checksum']]}}

# clean the package metadata values
clean_pkg_metadata(pkg)
new_ext = get_pkg_as_extension(ext_class, metadata)

# store the updated extension
updated_exts_list.append(pkg)
updated_exts_list.append(new_ext)

# print message to the user
if ext['version'] == pkg['version']:
if ext_version == new_ext['version']:
print_msg(
f"Package {ext['name']:<{PKG_NAME_OFFSET}} v{ext['version']:<{PKG_VERSION_OFFSET}} {'up-to-date':<{INFO_OFFSET}}", log=_log)
f"Package {ext_name:<{PKG_NAME_OFFSET}} v{ext_version:<{PKG_VERSION_OFFSET}} {'up-to-date':<{INFO_OFFSET}}", log=_log)
else:
print_msg(
f"Package {ext['name']:<{PKG_NAME_OFFSET}} v{ext['version']:<{PKG_VERSION_OFFSET}} updated to {pkg['version']:<{INFO_OFFSET}}", log=_log)
f"Package {ext_name:<{PKG_NAME_OFFSET}} v{ext_version:<{PKG_VERSION_OFFSET}} updated to {new_ext['version']:<{INFO_OFFSET}}", log=_log)

else:
# no metadata found, therefore store the original extension
updated_exts_list.append(ext)

# print message to the user
print_msg(
f"Package {ext['name']:<{PKG_NAME_OFFSET}} v{ext['version']:<{PKG_VERSION_OFFSET}} {'info not found':<{INFO_OFFSET}}", log=_log)
f"Package {ext_name:<{PKG_NAME_OFFSET}} v{ext_version:<{PKG_VERSION_OFFSET}} {'info not found':<{INFO_OFFSET}}", log=_log)

# aesthetic print
print()
Expand Down
137 changes: 94 additions & 43 deletions easybuild/framework/easyconfig/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,17 @@
from easybuild.tools.utilities import only_if_module_is_available, quote_str
from easybuild.tools.version import VERSION as EASYBUILD_VERSION

# URLs for package repositories
CRANDB_URL = "https://crandb.r-pkg.org"
PYPI_URL = "https://pypi.org/pypi"
BIOCONDUCTOR_URL = "https://bioconductor.org/packages/json"
BIOCONDUCTOR_PKGS_URL = "bioc/packages.json"
BIOCONDUCTOR_ANNOTATION_URL = "data/annotation/packages.json"
BIOCONDUCTOR_EXPERIMENT_URL = "data/experiment/packages.json"

# Global variable to store Bioconductor packages
bioc_packages_cache = None


# optional Python packages, these might be missing
# failing imports are just ignored
Expand Down Expand Up @@ -936,25 +945,72 @@ def get_python_package_checksum(pkg_metadata, pkg_version):
return checksum


def get_pkg_metadata(pkg_class, pkg_name, pkg_version=None):
def get_bioconductor_packages(bioc_version):
"""
Get the list of Bioconductor packages from the Bioconductor database.

:param bioc_version: Bioconductor version
"""

# global variable to store the bioconductor packages
global bioc_packages_cache

# check if bioconductor version has been provided
if not bioc_version:
return None

# bioconductor URLs
bioc_urls = ['%s/%s/%s' % (BIOCONDUCTOR_URL, bioc_version, BIOCONDUCTOR_PKGS_URL),
'%s/%s/%s' % (BIOCONDUCTOR_URL, bioc_version, BIOCONDUCTOR_ANNOTATION_URL),
'%s/%s/%s' % (BIOCONDUCTOR_URL, bioc_version, BIOCONDUCTOR_EXPERIMENT_URL)]

# check if the packages are already stored in memory
if bioc_packages_cache is None:

# initialize the cache
bioc_packages_cache = {}

# retrieve packages from the cloud
for url in bioc_urls:
try:
response = requests.get(url)

if response.status_code == 200:
bioc_packages_cache.update(response.json())
else:
print_warning(
f"Failed to get biocondcutor packages from {url}: HTTP status: {response.status_code}")
except Exception as err:
print_warning(f"Exception while getting bioconductor packages from {url}: {err}")

return bioc_packages_cache


def get_pkg_metadata(pkg_class, pkg_name, pkg_version=None, bioc_version=None):
"""
Get the metadata of the given package

:param pkg_class: package class (RPackage, PythonPackage, PerlPackage)
:param pkg_name: package name
:param pkg_version: package version. If None, the latest version will be retrieved.
:param bioc_version: bioconductor version
"""

# initialize variable
# initialize variables
pkg_metadata = None
bioc_packages = None

# build the db url to get the metadata from
# build the url to get the metadata from the database
if pkg_class == "RPackage":
if pkg_version:
url = "%s/%s/%s" % (CRANDB_URL, pkg_name, pkg_version)
else:
url = "%s/%s" % (CRANDB_URL, pkg_name)

# get bioc packages if bioconductor version is provided
if bioc_version:
bioc_packages = get_bioconductor_packages(bioc_version)

elif pkg_class == "PythonPackage":
url = "%s/%s/json" % (PYPI_URL, pkg_name)

Expand All @@ -971,55 +1027,50 @@ def get_pkg_metadata(pkg_class, pkg_name, pkg_version=None):
except Exception as err:
print_warning("Exception while getting metadata for extension %s: %s" % (pkg_name, err))

if pkg_metadata:

if pkg_class == "RPackage":
name = pkg_metadata.get('Package', '')
version = pkg_metadata.get('Version', '')
checksum = pkg_metadata.get('MD5sum', '')

elif pkg_class == "PythonPackage":
name = pkg_metadata.get('info', {}).get('name', '')
version = pkg_metadata.get('info', {}).get('version', '')
checksum = get_python_package_checksum(pkg_metadata, version)

else:
raise EasyBuildError("exts_defaultclass %s not supported" % pkg_class)

pkg_metadata = {"name": name, "version": version, "checksum": checksum}
# if the package is not found in the database, then check if it is a bioconductor package
if not pkg_metadata and bioc_packages:
# iterate over bioconductor packages to find the package
for package in bioc_packages.items():
if package[0] == pkg_name:
pkg_metadata = package[1]
break

return pkg_metadata


def clean_pkg_metadata(pkg):
def get_pkg_as_extension(pkg_class, pkg_metadata):
"""
Clean the name, version and checksum fields of the given package.
Get the package as an extension

:param pkg: extension data
:param pkg_class: package class (RPackage, PythonPackage, PerlPackage)
:param pkg_metadata: package metadata
"""

# if not package provided, then do nothing
if not pkg:
# if no metadata is provided, return None
if not pkg_metadata:
return None

# list of allowed characters in the version field
# check the package class and parse the metadata accordingly
if pkg_class == "RPackage":
name = pkg_metadata.get('Package', '')
version = pkg_metadata.get('Version', '')
checksum = pkg_metadata.get('MD5sum', '')

elif pkg_class == "PythonPackage":
name = pkg_metadata.get('info', {}).get('name', '')
version = pkg_metadata.get('info', {}).get('version', '')
checksum = get_python_package_checksum(pkg_metadata, version)

else:
raise EasyBuildError("exts_defaultclass %s not supported" % pkg_class)

# remove any non-alphanumeric characters from the version
allowed_version_chars = r'[^0-9><=!*. \-]'
version = re.sub(allowed_version_chars, '', version)

# remove any new line characters
name = name.replace('\n', '')
version = version.replace('\n', '')
checksum = checksum.replace('\n', '')

# some dependencies have an akward format and name and version need to be parsed
# regular expression pattern to match names like 'RSQLite (>= 2.0)'
pattern = r'^(?P<name>[^\s]+) \((?P<info>.+)\)$'
match = re.match(pattern, pkg['name'])
if match:
pkg['name'] = match.group('name')
pkg['version'] = match.group('info')

# Remove any non-alphanumeric characters from the version
if pkg['version']:
pkg['version'] = re.sub(allowed_version_chars, '', pkg['version'])

# remove any new line characters from the name, version and checksum
pkg['name'] = pkg['name'].replace('\n', '')
pkg['version'] = pkg['version'].replace('\n', '')
checksum = pkg['options']['checksums']
if checksum:
pkg['options']['checksums'] = [checksum[0].replace('\n', '')]
return {"name": name, "version": version, "options": {"checksums": [checksum]}}