Skip to content
164 changes: 164 additions & 0 deletions easybuild/tools/module_naming_scheme/generation_mns.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
##
# Copyright 2016-2021 Ghent University
#
# This file is part of EasyBuild,
# originally created by the HPC team of Ghent University (http://ugent.be/hpc/en),
# with support of Ghent University (http://ugent.be/hpc),
# the Flemish Supercomputer Centre (VSC) (https://www.vscentrum.be),
# Flemish Research Foundation (FWO) (http://www.fwo.be/en)
# and the Department of Economy, Science and Innovation (EWI) (http://www.ewi-vlaanderen.be/en).
#
# https://github.com/easybuilders/easybuild
#
# EasyBuild is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation v2.
#
# EasyBuild is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with EasyBuild. If not, see <http://www.gnu.org/licenses/>.
##
"""
Implementation of a different generation specific module naming scheme using release dates.
:author: Thomas Eylenbosch (Gluo N.V.)
:author: Thomas Soenen (B-square IT services)
:author: Alan O'Cais (CECAM)
"""

import os
import json

from easybuild.tools.module_naming_scheme.mns import ModuleNamingScheme
from easybuild.tools.build_log import EasyBuildError
from easybuild.tools.robot import search_easyconfigs
from easybuild.tools.config import ConfigurationVariables
from easybuild.framework.easyconfig.easyconfig import get_toolchain_hierarchy
from easybuild.tools.toolchain.toolchain import is_system_toolchain

GMNS_ENV = "GENERATION_MODULE_NAMING_SCHEME_LOOKUP_TABLE"


class GenerationModuleNamingScheme(ModuleNamingScheme):
"""Class implementing the generational module naming scheme."""

REQUIRED_KEYS = ['name', 'version', 'versionsuffix', 'toolchain']

def __init__(self):
"""
Generate lookup table that maps toolchains on foss generations. Generations (e.g. 2018a,
2020b) are fetched from the foss easyconfigs and dynamically mapped on toolchains using
get_toolchain_hierarchy. The lookup table can be extended by the user by providing a file.

Lookup table is a dict with toolchain-generation key-value pairs:{(GCC, 4.8.2): 2016a},
with toolchains resembled as a tuple.

json format of file with custom mappings:
{
"2018b": [{"name": "GCC", "version": "5.2.0"}, {"name": "GCC", "version": "4.8.2"}],
"2019b": [{"name": "GCC", "version": "5.2.4"}, {"name": "GCC", "version": "4.8.4"}],
}
"""
super().__init__()

self.lookup_table = {}

# Get all generations
foss_filenames = search_easyconfigs("^foss-20[0-9]{2}[a-z]\.eb",
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

invalid escape sequence '.'

filename_only=True,
print_result=False)
self.generations = [x.split('-')[1].split('.')[0] for x in foss_filenames]

# get_toolchain_hierarchy() depends on ActiveMNS(), which can't point to
# GenerationModuleNamingScheme to prevent circular reference errors. For that purpose, the MNS
# that ActiveMNS() points to is tweaked while get_toolchain_hierarchy() is used.
ConfigurationVariables()._FrozenDict__dict['module_naming_scheme'] = 'EasyBuildMNS'

# map generations on toolchains
for generation in self.generations:
for tc in get_toolchain_hierarchy({'name': 'foss', 'version': generation}):
self.lookup_table[(tc['name'], tc['version'])] = generation
# include (foss, <generation>) as a toolchain aswell
self.lookup_table[('foss', generation)] = generation

# Force config to point to other MNS
ConfigurationVariables()._FrozenDict__dict['module_naming_scheme'] = 'GenerationModuleNamingScheme'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand this, this points to the same MNS, not a different one.


# users can provide custom generation-toolchain mapping through a file
path = os.environ.get(GMNS_ENV)
if path:
if not os.path.isfile(path):
msg = "value of ENV {} ({}) should be a valid filepath"
raise EasyBuildError(msg.format(GMNS_ENV, path))
with open(path, 'r') as hc_lookup:
try:
hc_lookup_data = json.loads(hc_lookup.read())
except json.decoder.JSONDecodeError:
raise EasyBuildError("{} can't be decoded as json".format(path))
if not isinstance(hc_lookup_data, dict):
raise EasyBuildError("{} should contain a dict".format(path))
if not set(hc_lookup_data.keys()) <= set(self.generations):
raise EasyBuildError("Keys of {} should be generations".format(path))
for generation, toolchains in hc_lookup_data.items():
if not isinstance(toolchains, list):
raise EasyBuildError("Values of {} should be lists".format(path))
for tc in toolchains:
if not isinstance(tc, dict):
msg = "Toolchains in {} should be of type dict"
raise EasyBuildError(msg.format(path))
if set(tc.keys()) != {'name', 'version'}:
msg = "Toolchains in {} should have two keys ('name', 'version')"
raise EasyBuildError(msg.format(path))
self.lookup_table[(tc['name'], tc['version'])] = generation

def det_full_module_name(self, ec):
"""
Determine full module name, relative to the top of the module path.
Examples: General/GCC/4.8.3, Releases/2018b/OpenMPI/1.6.5
"""
return os.path.join(self.det_module_subdir(ec), self.det_short_module_name(ec))

def det_short_module_name(self, ec):
"""
Determine short module name, i.e. the name under which modules will be exposed to users.
Examples: GCC/4.8.3, OpenMPI/1.6.5, OpenBLAS/0.2.9, HPL/2.1, Python/2.7.5
"""
return os.path.join(ec['name'], self.det_full_version(ec))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't believe that you can drop the toolchain/version suffix here without exposing yourself to lots of problems. How does this take into account that you can have, for example, OpenMPI with other compilers? Looking at a concrete example:
OpenMPI-4.0.3-GCC-9.3.0.eb
OpenMPI-4.0.3-gcccuda-2020a.eb
OpenMPI-4.0.3-iccifort-2020.1.217.eb
all of these map to the same module file but are clearly not the same, this lack of uniqueness is a big problem as EB will see them all as installed once the module file is created.

It seems that the MNS is by design focussed on foss alone which means it is heavily exposed to problems that may occur when mixing different toolchains (for example fortran module incompatability between Intel/GCC). Even at foss level the MNS relies on there being no shadowing of software with different toolchains (for example, Python with GCCcore and the same version with GCC), this is currently true in recent releases but there is no guarantee that sites themselves respect this.


def det_full_version(self, ec):
"""Determine full version, taking into account version prefix/suffix."""
# versionprefix is not always available (e.g., for toolchains)
versionprefix = ec.get('versionprefix', '')
return versionprefix + ec['version'] + ec['versionsuffix']

def det_module_subdir(self, ec):
"""
Determine subdirectory for module file in $MODULEPATH. This determines the separation
between module names exposed to users, and what's part of the $MODULEPATH. subdirectory
is determined by mapping toolchain on a generation.
"""
release = 'releases'
release_version = ''

if is_system_toolchain(ec['toolchain']['name']):
release = 'General'
else:
if self.lookup_table.get((ec['toolchain']['name'], ec['toolchain']['version'])):
release_version = self.lookup_table[(ec['toolchain']['name'], ec['toolchain']['version'])]
else:
tc_hierarchy = get_toolchain_hierarchy({'name': ec['toolchain']['name'],
'version': ec['toolchain']['version']})
for tc in tc_hierarchy:
if self.lookup_table.get((tc['name'], tc['version'])):
release_version = self.lookup_table.get((tc['name'], tc['version']))
break

if release_version == '':
msg = "Couldn't map software version ({}, {}) to a generation. Provide a custom" \
"toolchain mapping through {}"
raise EasyBuildError(msg.format(ec['name'], ec['version'], GMNS_ENV))

return os.path.join(release, release_version).rstrip('/')
68 changes: 67 additions & 1 deletion test/framework/module_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import os
import re
import sys
import json
import tempfile
from distutils.version import LooseVersion
from unittest import TextTestRunner, TestSuite
Expand All @@ -48,7 +49,6 @@
from easybuild.tools.utilities import quote_str
from test.framework.utilities import EnhancedTestCase, TestLoaderFiltered, find_full_path, init_config


class ModuleGeneratorTest(EnhancedTestCase):
"""Tests for module_generator module."""

Expand Down Expand Up @@ -1442,6 +1442,72 @@ def test_ec(ecfile, short_modname, mod_subdir, modpath_exts, user_modpath_exts,
for ecfile, mns_vals in test_ecs.items():
test_ec(ecfile, *mns_vals)

def test_generation_mns(self):
"""Test generation module naming scheme."""

moduleclasses = ['base', 'compiler', 'mpi', 'numlib', 'system', 'toolchain']
ecs_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'easyconfigs', 'test_ecs')
all_stops = [x[0] for x in EasyBlock.get_steps()]
build_options = {
'check_osdeps': False,
'robot_path': [ecs_dir],
'valid_stops': all_stops,
'validate': False,
'valid_module_classes': moduleclasses,
}

os.environ['EASYBUILD_MODULE_NAMING_SCHEME'] = 'GenerationModuleNamingScheme'
os.environ['GENERATION_MODULE_NAMING_SCHEME_LOOKUP_TABLE'] = '/tmp/gmns_hardcoded_data.json'

gmns_hardcoded_data = {"2018a": [{"name": "GCC", "version": "4.9.2"}]}
with open('/tmp/gmns_hardcoded_data.json', 'w') as f:
f.write(json.dumps(gmns_hardcoded_data))
f.close()

init_config(build_options=build_options)

def test_ec(ecfile, short_modname, mod_subdir, modpath_exts, user_modpath_exts, init_modpaths):
"""Test whether active module naming scheme returns expected values."""
ec = EasyConfig(glob.glob(os.path.join(ecs_dir, '*', '*', ecfile))[0])

self.assertEqual(ActiveMNS().det_full_module_name(ec), os.path.join(mod_subdir, short_modname))
self.assertEqual(ActiveMNS().det_short_module_name(ec), short_modname)
self.assertEqual(ActiveMNS().det_module_subdir(ec), mod_subdir)
self.assertEqual(ActiveMNS().det_modpath_extensions(ec), modpath_exts)
self.assertEqual(ActiveMNS().det_user_modpath_extensions(ec), user_modpath_exts)
self.assertEqual(ActiveMNS().det_init_modulepaths(ec), init_modpaths)

# test examples that are resolved by the dynamically generated generation lookup table
# format: easyconfig_file: (short_mod_name, mod_subdir, modpath_exts, user_modpath_exts, init_modpaths)
test_ecs = {
'OpenMPI-2.1.2-GCC-6.4.0-2.28.eb': ('OpenMPI/2.1.2', 'releases/2018a', [], [], []),
'GCCcore-4.9.3.eb': ('GCCcore/4.9.3', 'General', [], [], []),
'gcccuda-2018a.eb': ('gcccuda/2018a', 'General', [], [], []),
'toy-0.0-gompi-2018a.eb': ('toy/0.0', 'releases/2018a', [], [], []),
'foss-2018a.eb': ('foss/2018a', 'General', [], [], [])
}

for ecfile, mns_vals in test_ecs.items():
test_ec(ecfile, *mns_vals)

# test error for examples without toolchain-generation mapping in lookup table. EasyConfig() calls
# det_module_subdir() of the generationModuleNamingScheme object for the toolchain (binutils)
with self.assertRaises(EasyBuildError) as cm:
EasyConfig(glob.glob(os.path.join(ecs_dir, '*', '*', 'hwloc-1.6.2-GCC-4.9.3-2.26.eb'))[0])

msg = "Couldn't map software version (binutils, 2.26) to a generation. Provide a customtoolchain " \
"mapping through GENERATION_MODULE_NAMING_SCHEME_LOOKUP_TABLE"
self.assertIn(msg, cm.exception.args[0])

# test lookup table extension with user-provided input. User-provided input (GCC 4.9.2 maps on 2018a)
# is provided through a file during setup at the start of the test case.
test_ecs_2 = {
'bzip2-1.0.6-GCC-4.9.2.eb': ('bzip2/1.0.6', 'releases/2018a', [], [], [])
}

for ecfile, mns_vals in test_ecs_2.items():
test_ec(ecfile, *mns_vals)

def test_dependencies_for(self):
"""Test for dependencies_for function."""
expected = [
Expand Down