Skip to content

Commit

Permalink
Add various improvements to compile options. Including Automatic AVX …
Browse files Browse the repository at this point in the history
…detection for MSVC. (#91)

* Add various improvements to compile options. Adds automatic AVX detection for MSVC.
  • Loading branch information
zpzim authored Feb 13, 2022
1 parent a09aed8 commit 98b089b
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 1 deletion.
24 changes: 23 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
cmake_minimum_required(VERSION 3.8 FATAL_ERROR)
include(CheckLanguage)
include(CheckCXXCompilerFlag)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
# ----------------------------------------------------------------------------------------
# ===== Project Setup =====
project(SCAMPmain LANGUAGES CXX)
Expand Down Expand Up @@ -120,11 +121,17 @@ if (CMAKE_CUDA_COMPILER)
endif()
endif()

CHECK_CXX_COMPILER_FLAG("-fPIC" COMPILER_OPT_PIC_SUPPORTED)
# AVX Config for MSVC
find_package(AVX)

CHECK_CXX_COMPILER_FLAG("-march=native" COMPILER_OPT_ARCH_NATIVE_SUPPORTED)
CHECK_CXX_COMPILER_FLAG("-xHost" COMPILER_OPT_XHOST_SUPPORTED)
CHECK_CXX_COMPILER_FLAG("/QxHost" COMPILER_OPT_QXHOST_SUPPORTED)
CHECK_CXX_COMPILER_FLAG("-O3" COMPILER_OPT_O3_SUPPORTED)
CHECK_CXX_COMPILER_FLAG("-funroll-loops" COMPILER_OPT_UNROLL_LOOPS_SUPPORTED)
CHECK_CXX_COMPILER_FLAG("-ffp-contract=fast" COMPILER_OPT_FPCONTRACT_FAST_SUPPORTED)
CHECK_CXX_COMPILER_FLAG("/fp:contract" COMPILER_OPT_FPCONTRACT_SUPPORTED)
CHECK_CXX_COMPILER_FLAG("-fPIC" COMPILER_OPT_PIC_SUPPORTED)
CHECK_CXX_COMPILER_FLAG("-Wall" COMPILER_OPT_WARN_ALL_SUPPORTED)
CHECK_CXX_COMPILER_FLAG("-Wno-sign-compare" COMPILER_OPT_NO_WARN_SIGN_COMPARE_SUPPORTED)

Expand All @@ -133,10 +140,21 @@ if (COMPILER_OPT_PIC_SUPPORTED)
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fPIC")
endif()


# GCC/Clang arch-specific optimizations
if (COMPILER_OPT_ARCH_NATIVE_SUPPORTED)
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -march=native")
endif()

# ICC arch-specific optimizations
if (COMPILER_OPT_XHOST_SUPPORTED)
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -xHost")
endif()

if (COMPILER_OPT_QXHOST_SUPPORTED)
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /QxHost")
endif()

if (COMPILER_OPT_O3_SUPPORTED)
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
endif()
Expand All @@ -149,6 +167,10 @@ if (COMPILER_OPT_FPCONTRACT_FAST_SUPPORTED)
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -ffp-contract=fast")
endif()

if (COMPILER_OPT_FPCONTRACT_SUPPORTED)
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /fp:contract")
endif()

if (COMPILER_OPT_WARN_ALL_SUPPORTED)
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Wall")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wall")
Expand Down
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
global-include CMakeLists.txt

graft src
graft cmake
graft third_party/gflags
graft third_party/pybind11
graft third_party/eigen
68 changes: 68 additions & 0 deletions cmake/FindAVX.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# Check for the presence of AVX and figure out the flags to use for it.
if (MSVC)
set(AVX_FLAGS)
include(CheckCXXSourceRuns)
set(CMAKE_REQUIRED_FLAGS)

# Check AVX
if(NOT MSVC_VERSION LESS 1600)
set(CMAKE_REQUIRED_FLAGS "/arch:AVX")
endif()

check_cxx_source_runs("
#include <immintrin.h>
int main()
{
__m256 a, b, c;
const float src[8] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f };
float dst[8];
a = _mm256_loadu_ps( src );
b = _mm256_loadu_ps( src );
c = _mm256_add_ps( a, b );
_mm256_storeu_ps( dst, c );
for( int i = 0; i < 8; i++ ){
if( ( src[i] + src[i] ) != dst[i] ){
return -1;
}
}
return 0;
}"
HAVE_AVX_EXTENSIONS)

# Check AVX2
if(NOT MSVC_VERSION LESS 1800)
set(CMAKE_REQUIRED_FLAGS "/arch:AVX2")
endif()

check_cxx_source_runs("
#include <immintrin.h>
int main()
{
__m256i a, b, c;
const int src[8] = { 1, 2, 3, 4, 5, 6, 7, 8 };
int dst[8];
a = _mm256_loadu_si256( (__m256i*)src );
b = _mm256_loadu_si256( (__m256i*)src );
c = _mm256_add_epi32( a, b );
_mm256_storeu_si256( (__m256i*)dst, c );
for( int i = 0; i < 8; i++ ){
if( ( src[i] + src[i] ) != dst[i] ){
return -1;
}
}
return 0;
}"
HAVE_AVX2_EXTENSIONS)

# Set Flags
if(HAVE_AVX2_EXTENSIONS AND NOT MSVC_VERSION LESS 1800)
message(STATUS "Detected AVX2 support. Using AVX2 Extensions.")
set(AVX_FLAGS "${AVX_FLAGS} /arch:AVX2")
elseif(HAVE_AVX_EXTENSIONS AND NOT MSVC_VERSION LESS 1600)
message(STATUS "Detected AVX support. Using AVX Extensions.")
set(AVX_FLAGS "${AVX_FLAGS} /arch:AVX")
endif()
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${AVX_FLAGS}")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} ${AVX_FLAGS}")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${AVX_FLAGS}")
endif()

1 comment on commit 98b089b

@github-actions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Performance Alert ⚠️

Possible performance regression was detected for benchmark.
Benchmark result of this commit is worse than the previous benchmark result exceeding threshold 1.50.

Benchmark suite Current: 98b089b Previous: a09aed8 Ratio
BM_MATRIX_SELF_JOIN/1/131072 83124511855.00003 ns/iter 50160445186.99999 ns/iter 1.66

This comment was automatically generated by workflow using github-action-benchmark.

CC: @zpzim

Please sign in to comment.