Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
87cfde3
diagonal port to oneapi. its tests pass except *LargeDim* and *GFOR*
Oct 5, 2022
eb31c7f
diff port to oneapi. its tests pass except *LargeDim* and *GFOR* (#3304)
pv-pterab-s Oct 7, 2022
9b4642e
Fix several warnings in oneAPI backend
umar456 Oct 7, 2022
227b1b1
Add support for oneAPI to the unified backend
umar456 Oct 7, 2022
a976c07
Use span instead of vector in getKernel APIs
umar456 Oct 12, 2022
416bb5b
Fix the way we encode backendId for unified backend
umar456 Oct 17, 2022
0bb2f7d
Remove extra print from the memcpy kernel in oneAPI
umar456 Oct 17, 2022
9f0829b
Fix backend_index and NUM_BACKENDS constants in unified
umar456 Oct 20, 2022
3b9b820
histogram ported to oneapi. (#3305)
pv-pterab-s Oct 21, 2022
0f9a29b
Add driver minimums for CUDA 11.8 toolkit
umar456 Oct 21, 2022
f79efb9
adds shared memory based reduction to oneapi backend
syurkevi Oct 25, 2022
a4c0220
adds mean kernel
syurkevi Nov 1, 2022
bb9edfd
adds where kernel, scan_first dependency
syurkevi Nov 3, 2022
9469aee
adds scan_dim kernels
syurkevi Nov 4, 2022
16f2244
corrects accessor types
syurkevi Nov 4, 2022
013b196
Fix errors on Linux builds for reduce, scan, and where
umar456 Nov 7, 2022
3775fd7
approx1 port to oneapi. tests out aside from Subs, JIT, Memory
Oct 8, 2022
5558341
approx2 port to oneapi. tests out aside from Subs, JIT, Memory
Oct 10, 2022
1157453
format
Oct 10, 2022
3fc5bd9
split oneapi approx into approx1 and approx2 separate sources
Oct 11, 2022
af3ef61
extract interp functor from approx1/approx2 for reuse in other kernels
syurkevi Nov 8, 2022
51a4f69
bilateral port to oneapi. tests pass except GFOR b/c of missing JIT
Oct 11, 2022
46735cd
fix: interp.hpp missing af/constants.h
Nov 11, 2022
8d4f680
formatting
Nov 11, 2022
dbc33fc
Fix documentation for af_clamp
umar456 Nov 14, 2022
af95a35
Avoid installing system forge when AF_INSTALL_STANDALONE not set
umar456 Nov 14, 2022
35a88d9
Fix ireduce failure in clang 14 due to b8 RNG optimization
umar456 Nov 18, 2022
d8900ea
Fix b8 RNG indexing so that the entire range of ctr is used
umar456 Nov 18, 2022
84046ca
Support 64bit hamming distance (#3314)
ktdq Nov 20, 2022
5a11efe
Fixes local issue with to_string. Refactor out hash funcitons
GuillaumeSchmid Sep 19, 2022
d352e69
Add compilers to GitHub actions matrix. Update Ubuntu versions
umar456 Nov 19, 2022
96d858a
Add support for fast math compiler flags when building ArrayFire
umar456 Nov 22, 2022
58e39e8
Convert vector to array in addInterpEnumOptions. Fix clang warnings
umar456 Nov 22, 2022
d17c212
Refactor GitHub workflows
umar456 Nov 22, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 0 additions & 38 deletions .github/workflows/clang-format-lint.yml

This file was deleted.

44 changes: 0 additions & 44 deletions .github/workflows/docs_build.yml

This file was deleted.

77 changes: 76 additions & 1 deletion .github/workflows/unix_cpu_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,58 @@ on:
name: ci

jobs:
clang-format:
name: Clang Format Lint
runs-on: ubuntu-latest
steps:
- name: Checkout Respository
uses: actions/checkout@master

- name: Check Sources
uses: DoozyX/clang-format-lint-action@v0.14
with:
source: './src ./test ./examples'
extensions: 'h,cpp,hpp'
clangFormatVersion: 14

documentation:
name: Documentation
runs-on: ubuntu-18.04
env:
DOXYGEN_VER: 1.8.18
steps:
- name: Checkout Repository
uses: actions/checkout@master

- name: Install Doxygen
run: |
wget --quiet https://sourceforge.net/projects/doxygen/files/rel-${DOXYGEN_VER}/doxygen-${DOXYGEN_VER}.linux.bin.tar.gz
mkdir doxygen
tar -xf doxygen-${DOXYGEN_VER}.linux.bin.tar.gz -C doxygen --strip 1

- name: Install Boost
run: |
sudo add-apt-repository ppa:mhier/libboost-latest
sudo apt-get -qq update
sudo apt-get install -y libboost1.74-dev

- name: Configure
run: |
mkdir build && cd build && unset VCPKG_ROOT
cmake -DAF_BUILD_CPU:BOOL=OFF -DAF_BUILD_CUDA:BOOL=OFF \
-DAF_BUILD_OPENCL:BOOL=OFF -DAF_BUILD_UNIFIED:BOOL=OFF \
-DAF_BUILD_EXAMPLES:BOOL=OFF -DBUILD_TESTING:BOOL=OFF \
-DDOXYGEN_EXECUTABLE:FILEPATH=${GITHUB_WORKSPACE}/doxygen/bin/doxygen ..

- name: Build
run: |
cd ${GITHUB_WORKSPACE}/build
cmake --build . --target docs

build_cpu:
name: CPU
runs-on: ${{ matrix.os }}
needs: [clang-format, documentation]
env:
NINJA_VER: 1.10.2
CMAKE_VER: 3.10.2
Expand All @@ -20,11 +69,16 @@ jobs:
matrix:
blas_backend: [Atlas, MKL, OpenBLAS]
os: [ubuntu-18.04, ubuntu-20.04, macos-latest]
compiler: [gcc, clang, icx]
exclude:
- os: macos-latest
blas_backend: Atlas
- os: macos-latest
blas_backend: MKL
- blas_backend: Atlas
compiler: icx
- blas_backend: OpenBLAS
compiler: icx
steps:
- name: Checkout Repository
uses: actions/checkout@master
Expand All @@ -43,6 +97,7 @@ jobs:
if: matrix.os != 'macos-latest'
env:
OS_NAME: ${{ matrix.os }}
CC: ${{ matrix.compiler }}
run: |
cmake_suffix=$(if [ $OS_NAME == 'macos-latest' ]; then echo "Darwin-x86_64"; else echo "Linux-x86_64"; fi)
cmake_url=$(echo "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VER}/cmake-${CMAKE_VER}-${cmake_suffix}.tar.gz")
Expand All @@ -54,6 +109,17 @@ jobs:
cmake_osx_dir=$(echo "${cmake_install_dir}/CMake.app/Contents/bin")
cmake_dir=$(if [ $OS_NAME == 'macos-latest' ]; then echo "${cmake_osx_dir}"; else echo "${cmake_lnx_dir}"; fi)
echo "CMAKE_PROGRAM=$(pwd)/${cmake_dir}/cmake" >> $GITHUB_ENV
case "$CC" in
'gcc')
echo "CXX=g++" >> $GITHUB_ENV
;;
'clang')
echo "CXX=clang++" >> $GITHUB_ENV
;;
'icx')
echo "CXX=icpx" >> $GITHUB_ENV
;;
esac

- name: Install Dependencies for Macos
if: matrix.os == 'macos-latest'
Expand All @@ -62,7 +128,7 @@ jobs:
echo "CMAKE_PROGRAM=cmake" >> $GITHUB_ENV

- name: Install Common Dependencies for Ubuntu
if: matrix.os == 'ubuntu-20.04' || matrix.os == 'ubuntu-18.04'
if: matrix.os == 'ubuntu-18.04' || matrix.os == 'ubuntu-20.04' || matrix.os == 'ubuntu-22.04'
run: |
sudo add-apt-repository ppa:mhier/libboost-latest
sudo apt-get -qq update
Expand All @@ -78,12 +144,15 @@ jobs:

- name: Install MKL for Ubuntu
if: matrix.os != 'macos-latest' && matrix.blas_backend == 'MKL'
env:
CC: ${{ matrix.compiler }}
run: |
wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
sudo sh -c 'echo deb https://apt.repos.intel.com/oneapi all main > /etc/apt/sources.list.d/oneAPI.list'
sudo apt-get -qq update
sudo apt-get install -y intel-oneapi-mkl-devel
if [ "$CC" == 'icx' ]; then sudo apt-get install -y intel-oneapi-compiler-dpcpp-cpp; fi
echo "MKLROOT=/opt/intel/oneapi/mkl/latest" >> ${GITHUB_ENV}

- name: Install OpenBLAS for Ubuntu
Expand All @@ -94,6 +163,8 @@ jobs:
env:
USE_MKL: ${{ matrix.blas_backend == 'MKL' }}
BLAS_BACKEND: ${{ matrix.blas_backend }}
CC: ${{ matrix.compiler }}
OS_NAME: ${{ matrix.os }}
run: |
ref=$(echo ${GITHUB_REF} | awk '/refs\/pull\/[0-9]+\/merge/{print $0}')
prnum=$(echo $ref | awk '{split($0, a, "/"); print a[3]}')
Expand All @@ -103,6 +174,7 @@ jobs:
backend=$(if [ "$USE_MKL" == 1 ]; then echo "Intel-MKL"; else echo "FFTW/LAPACK/BLAS"; fi)
buildname="$buildname-cpu-$BLAS_BACKEND"
cmake_rpath=$(if [ $OS_NAME == 'macos-latest' ]; then echo "-DCMAKE_INSTALL_RPATH=/opt/arrayfire/lib"; fi)
if [ "$CC" == 'icx' ]; then source /opt/intel/oneapi/setvars.sh intel64; fi
mkdir build && cd build && unset VCPKG_ROOT
${CMAKE_PROGRAM} -G Ninja \
-DCMAKE_MAKE_PROGRAM:FILEPATH=${GITHUB_WORKSPACE}/ninja \
Expand All @@ -115,6 +187,9 @@ jobs:
echo "CTEST_DASHBOARD=${dashboard}" >> $GITHUB_ENV

- name: Build and Test
env:
CC: ${{ matrix.compiler }}
run: |
cd ${GITHUB_WORKSPACE}/build
if [ "$CC" == 'icx' ]; then source /opt/intel/oneapi/setvars.sh intel64; fi
ctest -D Experimental --track ${CTEST_DASHBOARD} -T Test -T Submit -R cpu -j2
10 changes: 6 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ option(AF_WITH_STATIC_MKL "Link against static Intel MKL libraries" OFF)
option(AF_WITH_STATIC_CUDA_NUMERIC_LIBS "Link libafcuda with static numeric libraries(cublas, cufft, etc.)" OFF)
option(AF_WITH_SPDLOG_HEADER_ONLY "Build ArrayFire with header only version of spdlog" OFF)
option(AF_WITH_FMT_HEADER_ONLY "Build ArrayFire with header only version of fmt" OFF)
option(AF_WITH_FAST_MATH "Use lower precision but high performance numeric optimizations" OFF)

if(AF_WITH_STATIC_CUDA_NUMERIC_LIBS)
option(AF_WITH_PRUNE_STATIC_CUDA_NUMERIC_LIBS "Prune CUDA static libraries to reduce binary size.(WARNING: May break some libs on older CUDA toolkits for some compute arch)" OFF)
Expand Down Expand Up @@ -317,14 +318,15 @@ if(CMAKE_CROSSCOMPILING)
"directory and build the bin2cpp target.")
endif()
else()
add_executable(bin2cpp ${ArrayFire_SOURCE_DIR}/CMakeModules/bin2cpp.cpp
${ArrayFire_SOURCE_DIR}/src/backend/common/util.cpp)
add_executable(bin2cpp CMakeModules/bin2cpp.cpp
src/backend/common/deterministicHash.cpp
src/backend/common/deterministicHash.hpp
src/backend/common/Source.hpp)
set_target_properties(bin2cpp
PROPERTIES
CXX_STANDARD 17)
target_link_libraries(bin2cpp PRIVATE nonstd::span-lite)

# NOSPDLOG is used to remove the spdlog dependency from bin2cpp
target_compile_definitions(bin2cpp PRIVATE NOSPDLOG)
if(WIN32)
target_compile_definitions(bin2cpp PRIVATE OS_WIN)
elseif(APPLE)
Expand Down
3 changes: 2 additions & 1 deletion CMakeModules/AFconfigure_forge_dep.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ else(AF_BUILD_FORGE)

if(TARGET Forge::forge)
get_target_property(fg_lib_type Forge::forge TYPE)
if(NOT ${fg_lib_type} STREQUAL "STATIC_LIBRARY")
if(NOT ${fg_lib_type} STREQUAL "STATIC_LIBRARY" AND
AF_INSTALL_STANDALONE)
install(FILES
$<TARGET_FILE:Forge::forge>
$<$<PLATFORM_ID:Linux>:$<TARGET_SONAME_FILE:Forge::forge>>
Expand Down
27 changes: 26 additions & 1 deletion CMakeModules/InternalUtils.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,15 @@ if(WIN32)
check_cxx_compiler_flag(/permissive- cxx_compliance)
endif()

check_cxx_compiler_flag(-ffast-math has_cxx_fast_math)
check_cxx_compiler_flag("-fp-model fast" has_cxx_fp_model)
check_cxx_compiler_flag(-fno-errno-math has_cxx_no_errno_math)
check_cxx_compiler_flag(-fno-trapping-math has_cxx_no_trapping_math)
check_cxx_compiler_flag(-fno-signed-zeros has_cxx_no_signed_zeros)
check_cxx_compiler_flag(-mno-ieee-fp has_cxx_no_ieee_fp)
check_cxx_compiler_flag(-Wno-unqualified-std-cast-call has_cxx_unqualified_std_cast_call)
check_cxx_compiler_flag(-Werror=reorder-ctor has_cxx_error_reorder_ctor)

function(arrayfire_set_default_cxx_flags target)
target_compile_options(${target}
PRIVATE
Expand All @@ -39,6 +48,7 @@ function(arrayfire_set_default_cxx_flags target)
/wd4668
/wd4710
/wd4505
/we5038
/bigobj
/EHsc
# MSVC incorrectly sets the cplusplus to 199711L even if the compiler supports
Expand All @@ -51,7 +61,21 @@ function(arrayfire_set_default_cxx_flags target)
# ignored attribute warnings in the OpenCL
# headers
$<$<BOOL:${has_ignored_attributes_flag}>:-Wno-ignored-attributes>
$<$<BOOL:${has_all_warnings_flag}>:-Wall>>
$<$<BOOL:${has_all_warnings_flag}>:-Wall>
$<$<BOOL:${has_cxx_unqualified_std_cast_call}>:-Wno-unqualified-std-cast-call>
$<$<BOOL:${has_cxx_error_reorder_ctor}>:-Werror=reorder-ctor>

$<$<BOOL:${AF_WITH_FAST_MATH}>:
$<$<BOOL:${has_cxx_fast_math}>:-ffast-math>
$<$<BOOL:${has_cxx_no_errno_math}>:-fno-errno-math>
$<$<BOOL:${has_cxx_no_trapping_math}>:-fno-trapping-math>
$<$<BOOL:${has_cxx_no_signed_zeros}>:-fno-signed-zeros>
$<$<BOOL:${has_cxx_no_ieee_fp}>:-mno-ieee-fp>
>

$<$<NOT:$<BOOL:${AF_WITH_FAST_MATH}>>:
$<$<BOOL:${has_cxx_fp_model}>:-fp-model precise>>
>
)

target_compile_definitions(${target}
Expand All @@ -65,6 +89,7 @@ function(arrayfire_set_default_cxx_flags target)

$<$<BOOL:${AF_WITH_LOGGING}>: AF_WITH_LOGGING>
$<$<BOOL:${AF_CACHE_KERNELS_TO_DISK}>: AF_CACHE_KERNELS_TO_DISK>
$<$<BOOL:${AF_WITH_FAST_MATH}>: AF_WITH_FAST_MATH>
)
endfunction()

Expand Down
4 changes: 2 additions & 2 deletions CMakeModules/bin2cpp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
#include <utility>
#include <vector>

#include <common/util.hpp>
#include <common/deterministicHash.hpp>

using namespace std;
using std::cout;
Expand Down Expand Up @@ -275,7 +275,7 @@ int main(int argc, const char *const *const argv) {

cout << "#pragma once\n";
cout << "#include <cstddef>\n"; // defines size_t
cout << "#include <common/util.hpp>\n"; // defines common::Source
cout << "#include <common/Source.hpp>\n"; // defines common::Source

int ns_cnt = 0;
int level = 0;
Expand Down
7 changes: 6 additions & 1 deletion docs/details/arith.dox
Original file line number Diff line number Diff line change
Expand Up @@ -190,14 +190,19 @@ Bitwise xor operation of two inputs
Minimum of two inputs.



\defgroup arith_func_max max

\ingroup numeric_mat

Maximum of two inputs.


\defgroup arith_func_clamp clamp

\ingroup numeric_mat

Limits the range of the in array to the values between lo and hi


\defgroup arith_func_rem rem

Expand Down
4 changes: 2 additions & 2 deletions examples/benchmarks/pi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ static double pi_device() {
static double pi_host() {
int count = 0;
for (int i = 0; i < samples; ++i) {
float x = float(rand()) / RAND_MAX;
float y = float(rand()) / RAND_MAX;
float x = float(rand()) / float(RAND_MAX);
float y = float(rand()) / float(RAND_MAX);
if (sqrt(x * x + y * y) < 1) count++;
}
return 4.0 * count / samples;
Expand Down
Loading