Change the naming convention for SIMD defines and modes

This is a followup to #221 and PR #421.

The SIMD modes we are supporting were initially designed for Intel, and are using an Intel naming convention, both in their short tag description and especially in their ifdefs in the code. Non-Intel platforms are supported using the ugly hack of adding Intel like defines in the code instead of using native ones (namely, Power9 VSX and ARM Neon, both 128 bits, are supported by artificially adding an __SSE42__ define).

This should be improved in one or maybe two ways:
- first, the ifdefs in the code should certainly be changed (this must be done)
- second, optionally, the short text descriptions could be changed (this I am not yet convinced about, we have many tables using the old nomenclature... to be decided)

About the first point, the part that needs to be changed is especially this one
https://github.com/madgraph5/madgraph4gpu/blob/e2c4c0a3d66b35166bcf89cf73170f05ac872cd1/epochX/cudacpp/gg_tt/src/mgOnGpuConfig.h#L125
```
// C++ SIMD vectorization width (this will be used to set neppV)
#ifdef __CUDACC__ // CUDA implementation has no SIMD
#undef MGONGPU_CPPSIMD
#elif defined __AVX512VL__ && defined MGONGPU_PVW512 // C++ "512z" AVX512 with 512 width (512-bit ie 64-byte): 8 (DOUBLE) or 16 (FLOAT)
#ifdef MGONGPU_FPTYPE_DOUBLE
#define MGONGPU_CPPSIMD 8
#else
#define MGONGPU_CPPSIMD 16
#endif
#elif defined __AVX512VL__ // C++ "512y" AVX512 with 256 width (256-bit ie 32-byte): 4 (DOUBLE) or 8 (FLOAT) [gcc DEFAULT]
#ifdef MGONGPU_FPTYPE_DOUBLE
#define MGONGPU_CPPSIMD 4
#else
#define MGONGPU_CPPSIMD 8
#endif
#elif defined __AVX2__ // C++ "avx2" AVX2 (256-bit ie 32-byte): 4 (DOUBLE) or 8 (FLOAT) [clang DEFAULT]
#ifdef MGONGPU_FPTYPE_DOUBLE
#define MGONGPU_CPPSIMD 4
#else
#define MGONGPU_CPPSIMD 8
#endif
#elif defined __SSE4_2__ // C++ "sse4" SSE4.2 (128-bit ie 16-byte): 2 (DOUBLE) or 4 (FLOAT) [Power9 and ARM default]
#ifdef MGONGPU_FPTYPE_DOUBLE
#define MGONGPU_CPPSIMD 2
#else
#define MGONGPU_CPPSIMD 4
#endif
#else // C++ "none" i.e. no SIMD
#undef MGONGPU_CPPSIMD
#endif
```
and correspondingly this one
https://github.com/madgraph5/madgraph4gpu/blob/e2c4c0a3d66b35166bcf89cf73170f05ac872cd1/epochX/cudacpp/gg_tt/SubProcesses/Makefile#L221
```
# Set the build flags appropriate to each AVX choice (example: "make AVX=none")
# [NB MGONGPU_PVW512 is needed because "-mprefer-vector-width=256" is not exposed in a macro]
# [See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96476]
$(info AVX=$(AVX))
ifeq ($(UNAME_P),ppc64le)
  ifeq ($(AVX),sse4)
    override AVXFLAGS = -D__SSE4_2__ # Power9 VSX with 128 width (VSR registers)
  else ifneq ($(AVX),none)
    $(error Unknown AVX='$(AVX)': only 'none' and 'sse4' are supported on PowerPC for the moment)
  endif
else ifeq ($(UNAME_P),arm)
  ifeq ($(AVX),sse4)
    override AVXFLAGS = -D__SSE4_2__ # ARM NEON with 128 width (Q/quadword registers)
  else ifneq ($(AVX),none)
    $(error Unknown AVX='$(AVX)': only 'none' and 'sse4' are supported on ARM for the moment)
  endif
else
  ifeq ($(AVX),sse4)
    override AVXFLAGS = -march=nehalem # SSE4.2 with 128 width (xmm registers)
  else ifeq ($(AVX),avx2)
    override AVXFLAGS = -march=haswell # AVX2 with 256 width (ymm registers) [DEFAULT for clang]
  else ifeq ($(AVX),512y)
    override AVXFLAGS = -march=skylake-avx512 -mprefer-vector-width=256 # AVX512 with 256 width (ymm registers) [DEFAULT for gcc]
  else ifeq ($(AVX),512z)
    override AVXFLAGS = -march=skylake-avx512 -DMGONGPU_PVW512 # AVX512 with 512 width (zmm registers)
  else ifneq ($(AVX),none)
    $(error Unknown AVX='$(AVX)': only 'none', 'sse4', 'avx2', '512y' and '512z' are supported)
  endif
endif
# For the moment, use AVXFLAGS everywhere: eventually, use them only in encapsulated implementations?
CXXFLAGS+= $(AVXFLAGS)
```

One possibility could be to use defines like
- for 'none', -DMGONGPU_SIMDNONE
- for 'sse4', -DMGONGPU_SIMD128
- for 'avx2' and '512y', -DMGONGPU_SIMD256 (relying on __AVX512VL__ to decide whether this is 'avx2' or '512y')
- for '512z', -DMGONGPU_SIMD512
Note that 512y is a peculiar case, using 256bit registers but also some AVX512 symbols, and it is clearly an Intel specific thing.

For the tag names, I would be inclined to keep 'none', 'sse42' etc, precisely because things like '512y' are very difficult to describe in terms of register width (otherwise it would be '256plus' or something similar...).

To be discussed. Not too urgent, anyway.


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Change the naming convention for SIMD defines and modes #426

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Change the naming convention for SIMD defines and modes #426

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions