Skip to content
This repository has been archived by the owner on May 8, 2021. It is now read-only.

Commit

Permalink
Merge pull request clMathLibraries#210 from TimmyLiu/master
Browse files Browse the repository at this point in the history
merge develop branch into master branch. bump the version number to 2.10
  • Loading branch information
TimmyLiu committed Jan 5, 2016
2 parents 0fc3d3f + fffd478 commit d16f7b3
Show file tree
Hide file tree
Showing 100 changed files with 1,540 additions and 411 deletions.
49 changes: 38 additions & 11 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ language: cpp
# sudo: false instructs travis to build our project in a docker VM (faster)
# Can not yet install fglrx packages with 'false'
sudo: required # false
dist: trusty

# os: expands the build matrix to include multiple os's
# disable linux, as we get sporadic failures on building boost, needs investigation
Expand Down Expand Up @@ -43,6 +44,7 @@ addons:
# boost-latest contains boost v1.55
- boost-latest
packages:
- gfortran
# g++-4.8 is minimum version considered to be the first good c++11 gnu compiler
# - g++-4.8
# - clang-3.6
Expand All @@ -66,6 +68,8 @@ addons:
env:
global:
- CLBLAS_ROOT=${TRAVIS_BUILD_DIR}/bin/make/release
- OPENCL_REGISTRY=https://www.khronos.org/registry/cl
- OPENCL_ROOT=${TRAVIS_BUILD_DIR}/bin/opencl

# The following filters our build matrix; we are interested in linux-gcc & osx-clang
matrix:
Expand All @@ -77,13 +81,13 @@ matrix:

before_install:
# Remove the following linux clause when fglrx can be installed with sudo: false
- if [ ${TRAVIS_OS_NAME} == "linux" ]; then
sudo apt-get update -qq &&
sudo apt-get install -qq fglrx=2:13.350.1-0ubuntu0.0.1;
fi
- if [ ${TRAVIS_OS_NAME} == "linux" ]; then
export OPENCL_ROOT="${TRAVIS_BUILD_DIR}/opencl-headers";
fi
#- if [ ${TRAVIS_OS_NAME} == "linux" ]; then
# sudo apt-get update -qq &&
# sudo apt-get install -qq fglrx=2:13.350.1-0ubuntu0.0.1;
# fi
#- if [ ${TRAVIS_OS_NAME} == "linux" ]; then
# export OPENCL_ROOT="${TRAVIS_BUILD_DIR}/opencl-headers";
# fi
- if [ ${TRAVIS_OS_NAME} == "osx" ]; then
brew update;
brew outdated boost || brew upgrade boost;
Expand All @@ -97,10 +101,33 @@ before_install:
install:
# 'Precise' only distributes v1.1 opencl headers; download 1.2 headers from khronos website
# Remove when the travis VM upgrades to 'trusty' or beyond
#- if [ ${TRAVIS_OS_NAME} == "linux" ]; then
# mkdir -p ${OPENCL_ROOT}/include/CL;
# pushd ${OPENCL_ROOT}/include/CL;
# wget -w 1 -r -np -nd -nv -A h,hpp https://www.khronos.org/registry/cl/api/1.2/;
# popd;
# fi
# The following linux logic is necessary because of Travis's move to the GCE platform, which does not
# currently contain packages for fglrx: https://github.com/travis-ci/travis-ci/issues/5221
# We build our own linkable .so file
- if [ ${TRAVIS_OS_NAME} == "linux" ]; then
mkdir -p ${OPENCL_ROOT}/include/CL;
pushd ${OPENCL_ROOT}/include/CL;
wget -w 1 -r -np -nd -nv -A h,hpp https://www.khronos.org/registry/cl/api/1.2/;
mkdir -p ${OPENCL_ROOT};
pushd ${OPENCL_ROOT};
wget ${OPENCL_REGISTRY}/specs/opencl-icd-1.2.11.0.tgz;
tar -xf opencl-icd-1.2.11.0.tgz;
mv ./icd/* .;
mkdir -p inc/CL;
pushd inc/CL;
wget -r -w 1 -np -nd -nv -A h,hpp https://www.khronos.org/registry/cl/api/1.2/;
wget -w 1 -np -nd -nv -A h,hpp https://www.khronos.org/registry/cl/api/2.1/cl.hpp;
popd;
mkdir -p lib;
pushd lib;
cmake -G "Unix Makefiles" ..;
make;
cp ../bin/libOpenCL.so .;
popd;
mv inc/ include/;
popd;
fi
# osx image does not contain cl.hpp file; download from Khronos
Expand All @@ -114,7 +141,7 @@ install:
before_script:
- mkdir -p ${CLBLAS_ROOT}
- pushd ${CLBLAS_ROOT}
- cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_TEST=OFF -DBUILD_CLIENT=ON -DOCL_VERSION=2.0 -DOPENCL_ROOT=${OPENCL_ROOT} ${TRAVIS_BUILD_DIR}/src
- cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_TEST=OFF -DBUILD_CLIENT=OFF -DOCL_VERSION=2.0 -DOPENCL_ROOT=${OPENCL_ROOT} ${TRAVIS_BUILD_DIR}/src

# use script: to execute build steps
script:
Expand Down

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,8 @@ message( STATUS "You have confirmed OpenCL ${OPENCL_VERSION} is supported in you
if ( APPLE )
set(CORR_TEST_WITH_ACML OFF CACHE BOOL "Use ACML library in correctness tests")
else ( )
message(STATUS "CORR_TEST_WITH_ACML set to ON")
set(CORR_TEST_WITH_ACML ON CACHE BOOL "Use ACML library in correctness tests")
message(STATUS "CORR_TEST_WITH_ACML set to OFF. Try link with libblas.so")
set(CORR_TEST_WITH_ACML OFF CACHE BOOL "Use ACML library in correctness tests")
endif( )

if( CMAKE_GENERATOR MATCHES "NMake" )
Expand All @@ -95,8 +95,8 @@ if( CMAKE_GENERATOR MATCHES "NMake" )
endif( )
endif( )

# If we are on linux, and we wish to link with the netlib BLAS implementation, we need to have a valid fortran compiler
if( NOT CORR_TEST_WITH_ACML AND NOT WIN32 AND NOT APPLE )
# If we are on linux, and we wish to link with the netlib BLAS implementation when BUILD_TEST is ON, we need to have a valid fortran compiler
if(BUILD_TEST AND NOT CORR_TEST_WITH_ACML AND NOT WIN32 AND NOT APPLE)
project(clBLAS Fortran C CXX )
else( )
project(clBLAS C CXX)
Expand All @@ -108,7 +108,7 @@ if( NOT DEFINED clBLAS_VERSION_MAJOR )
endif( )

if( NOT DEFINED clBLAS_VERSION_MINOR )
set( clBLAS_VERSION_MINOR 8 )
set( clBLAS_VERSION_MINOR 10 )
endif( )

if( NOT DEFINED clBLAS_VERSION_PATCH )
Expand Down
5 changes: 3 additions & 2 deletions src/FindNetlib.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,10 @@ endif( )

# The library name available from Netlib has different names for 64bit and 32bit libs
if( LIB64 )
set( Netlib_BLAS_LIBNAME libblas )
set( Netlib_BLAS_LIBNAME blas )
# set( Netlib_BLAS_LIBNAME BLAS ) Even though the download is named BLAS, the linker expects the .dll to be called libblas.dll
else( )
set( Netlib_BLAS_LIBNAME libblas )
set( Netlib_BLAS_LIBNAME blas )
endif( )

list( FIND Netlib_FIND_COMPONENTS BLAS contains_BLAS )
Expand All @@ -91,6 +91,7 @@ if( NOT contains_BLAS EQUAL -1 )
PATHS
/usr/lib
/usr/local/lib
/usr/lib/libblas
DOC "Netlib dynamic library path"
PATH_SUFFIXES lib
)
Expand Down
11 changes: 9 additions & 2 deletions src/library/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
# limitations under the License.
# ########################################################################

find_package(PythonInterp REQUIRED)



Expand Down Expand Up @@ -43,6 +44,7 @@ set(AUTOGEMM_HEADERS
)

set(AUTOGEMM_SRC
${CMAKE_SOURCE_DIR}/library/blas/AutoGemm/UserGemmKernelSources/UserGemmClKernels.cc
${CMAKE_BINARY_DIR}/include/AutoGemmIncludes/AutoGemmClKernels.cpp
${CMAKE_BINARY_DIR}/include/AutoGemmIncludes/AutoGemmKernelBuildOptionsBinary.cpp
${CMAKE_BINARY_DIR}/include/AutoGemmIncludes/AutoGemmKernelBinaries.cpp
Expand Down Expand Up @@ -90,6 +92,10 @@ option( PRECOMPILE_GEMM_TRANS_CN "AutoGemm: pre-compile CN transpose cases" OFF)
option( PRECOMPILE_GEMM_TRANS_CT "AutoGemm: pre-compile CT transpose cases" OFF)
option( PRECOMPILE_GEMM_TRANS_CC "AutoGemm: pre-compile CC transpose cases" OFF)

set( AUTOGEMM_ARCHITECTURE "Hawaii" CACHE STRING "AutoGemm: device for kernel selection logic" )
set_property( CACHE AUTOGEMM_ARCHITECTURE PROPERTY STRINGS "Hawaii" "Fiji" )



# opencl compiler version
#set( PRECOMPILE_GEMM_OPENCL_VERSION "2.0" CACHE STRING "OpenCL compiler version supported by device driver." )
Expand Down Expand Up @@ -197,7 +203,7 @@ set( AUTOGEMM_PRECOMPILE_HEADER_SRC ${CMAKE_SOURCE_DIR}/library/blas/AutoGemm/Ke
set( AUTOGEMM_PRECOMPILE_HEADER_OUT ${CMAKE_BINARY_DIR}/include/AutoGemmIncludes/AutoGemmKernelsToPreCompile.h )
add_custom_command(
OUTPUT ${AUTOGEMM_PRECOMPILE_HEADER_OUT}
COMMAND python ${CMAKE_SOURCE_DIR}/library/blas/AutoGemm/KernelsToPreCompile.py ${AGPC_ARGS}
COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/library/blas/AutoGemm/KernelsToPreCompile.py ${AGPC_ARGS}
DEPENDS ${AUTOGEMM_PRECOMPILE_HEADER_SRC}
)

Expand Down Expand Up @@ -238,7 +244,7 @@ endif()#endif precompile active
################################################################################
add_custom_command(
OUTPUT ${AUTOGEMM_HEADERS} ${AUTOGEMM_SRC}
COMMAND python ${CMAKE_SOURCE_DIR}/library/blas/AutoGemm/AutoGemm.py --output-path ${CMAKE_BINARY_DIR}/include --opencl-compiler-version ${OPENCL_VERSION}
COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/library/blas/AutoGemm/AutoGemm.py --output-path ${CMAKE_BINARY_DIR}/include --opencl-compiler-version ${OPENCL_VERSION} --architecture ${AUTOGEMM_ARCHITECTURE}
DEPENDS ${AUTOGEMM_SCRIPTS}
)

Expand Down Expand Up @@ -851,6 +857,7 @@ set(CLBLAS_ALL_SOURCES
#${USERGEMM_SRC}
#${USERGEMM_HEADERS}
)
add_definitions(-DOPENCL_VERSION="${OPENCL_VERSION}")
add_library(clBLAS ${CLBLAS_ALL_SOURCES})
add_dependencies(clBLAS GENERATE_CLT)

Expand Down
8 changes: 5 additions & 3 deletions src/library/blas/AutoGemm/AutoGemm.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import Common
import Includes
import KernelSelection
import AutoGemmParameters
import KernelOpenCL


Expand All @@ -32,16 +33,17 @@
ap = argparse.ArgumentParser(description="AutoGemm")
ap.add_argument("--output-path", dest="output" )
ap.add_argument("--opencl-compiler-version", dest="clCompilerVersion", action="store", choices=["1.1", "1.2", "2.0" ])
ap.add_argument("--architecture", dest="architecture", action="store", choices=["Hawaii", "Fiji" ])
args = ap.parse_args()
if args.output:
Common.setOutputPath(args.output)
else:
print "AutoGemm.py: Warning: No output path specified; default is working directory."
print("AutoGemm.py: Warning: No output path specified; default is working directory.")

print "AutoGemm.py: using OpenCL " + args.clCompilerVersion + " compiler"
print("AutoGemm.py: using OpenCL " + args.clCompilerVersion + " compiler")
Common.setClCompilerVersion(args.clCompilerVersion)
AutoGemmParameters.setArchitecture(args.architecture)

KernelOpenCL.writeOpenCLKernels()
KernelSelection.writeKernelSelection()
Includes.writeIncludes()

70 changes: 54 additions & 16 deletions src/library/blas/AutoGemm/AutoGemmParameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# Tile Parameters for Kernel Selection Data
################################################################################

kernelSelectionData = {
kernelSelectionDataHawaii = {
# [ size, fallback tile, [ valid tiles ] ],
"s":[
[ 4000, [ 16, 16, 6, 6], [ [ 16, 16, 6, 6] ] ],
Expand Down Expand Up @@ -54,17 +54,55 @@
],
}

"""
for testing all micro-tile sizes
[ 128, [ 16, 16, 8, 8], [ [ 16, 16, 8, 8] ] ],
[ 112, [ 16, 16, 7, 7], [ [ 16, 16, 7, 7] ] ],
[ 96, [ 16, 16, 6, 6], [ [ 16, 16, 6, 6] ] ],
[ 80, [ 16, 16, 5, 5], [ [ 16, 16, 5, 5] ] ],
[ 64, [ 16, 16, 4, 4], [ [ 16, 16, 4, 4] ] ],
[ 48, [ 16, 16, 3, 3], [ [ 16, 16, 3, 3] ] ],
[ 32, [ 16, 16, 2, 2], [ [ 16, 16, 2, 2] ] ],
[ 0, [ 16, 16, 1, 1], [ [ 16, 16, 1, 1] ] ],
"""
kernelSelectionDataFiji = {
"s":[
[ 3072, [ 16, 16, 6, 6], [ [ 16, 16, 6, 6], [ 16, 16, 5, 5], [ 16, 16, 4, 4] ] ],
[ 2240, [ 16, 16, 6, 6], [ [ 16, 16, 6, 6], [ 16, 16, 4, 4], [ 16, 16, 5, 5], [ 16, 16, 3, 3] ] ],
[ 1760, [ 16, 16, 4, 4], [ [ 16, 16, 6, 6], [ 16, 16, 4, 4], [ 16, 16, 5, 5], [ 16, 16, 3, 3] ] ],
[ 1600, [ 16, 16, 4, 4], [ [ 16, 16, 4, 4], [ 16, 16, 6, 6], [ 16, 16, 5, 5], [ 16, 16, 3, 3] ] ],
[ 1056, [ 16, 16, 4, 4], [ [ 16, 16, 4, 4], [ 16, 16, 6, 6], [ 16, 16, 5, 5], [ 16, 16, 3, 3], [ 16, 16, 2, 2] ] ],
[ 960, [ 16, 16, 4, 4], [ [ 16, 16, 4, 4], [ 16, 16, 5, 5], [ 16, 16, 3, 3], [ 16, 16, 2, 2] ] ],
[ 736, [ 16, 16, 3, 3], [ [ 16, 16, 4, 4], [ 16, 16, 3, 3], [ 16, 16, 5, 5], [ 16, 16, 2, 2] ] ],
[ 528, [ 16, 16, 3, 3], [ [ 16, 16, 4, 4], [ 16, 16, 3, 3], [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ],
[ 432, [ 16, 16, 2, 2], [ [ 16, 16, 3, 3], [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ],
[ 320, [ 16, 16, 2, 2], [ [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ],
[ 0, [ 16, 16, 1, 1], [ [ 16, 16, 1, 1] ] ],
],
"d":[
[ 3200, [ 16, 16, 4, 4], [ [ 16, 16, 4, 4], [ 16, 16, 5, 5], [ 16, 16, 2, 2], [ 8, 8, 6, 6 ] ] ],
[ 1632, [ 16, 16, 2, 2], [ [ 16, 16, 4, 4], [ 16, 16, 2, 2], [ 16, 16, 5, 5], [ 8, 8, 6, 6 ] ] ],
[ 1280, [ 16, 16, 2, 2], [ [ 16, 16, 4, 4], [ 16, 16, 2, 2], [ 16, 16, 5, 5], [ 8, 8, 6, 6 ], [ 16, 16, 1, 1] ] ],
[ 1056, [ 16, 16, 2, 2], [ [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ],
[ 672, [ 16, 16, 2, 2], [ [ 16, 16, 1, 1] ] ],
[ 0, [ 16, 16, 1, 1], [ [ 16, 16, 1, 1] ] ],
],
"c":[
[ 2240, [ 16, 16, 4, 4], [ [ 16, 16, 4, 4], [ 16, 16, 6, 6], ] ],
[ 1440, [ 16, 16, 4, 4], [ [ 16, 16, 4, 4], [ 16, 16, 6, 6], [ 16, 16, 5, 5], [ 16, 16, 2, 2] ] ],
[ 1088, [ 16, 16, 2, 2], [ [ 16, 16, 4, 4], [ 16, 16, 3, 3], [ 16, 16, 2, 2], [ 16, 16, 5, 5] ] ],
[ 704, [ 16, 16, 2, 2], [ [ 16, 16, 2, 2], [ 16, 16, 3, 3], [ 16, 16, 5, 5] ] ],
[ 528, [ 16, 16, 2, 2], [ [ 16, 16, 2, 2], [ 16, 16, 3, 3], [ 16, 16, 1, 1] ] ],
[ 336, [ 16, 16, 2, 2], [ [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ],
[ 0, [ 16, 16, 1, 1], [ [ 16, 16, 1, 1] ] ],
],
"z":[
[ 2528, [ 16, 16, 2, 2], [ [ 16, 16, 4, 4], [ 16, 16, 2, 2], [ 16, 16, 3, 3] ] ],
[ 1872, [ 16, 16, 2, 2], [ [ 16, 16, 2, 2], [ 16, 16, 3, 3], [ 16, 16, 1, 1] ] ],
[ 1040, [ 16, 16, 2, 2], [ [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ],
[ 768, [ 16, 16, 1, 1], [ [ 16, 16, 2, 2], [ 16, 16, 1, 1] ] ],
[ 0, [ 16, 16, 1, 1], [ [ 16, 16, 1, 1] ] ],
]
}

kernelSelectionData = kernelSelectionDataHawaii
def setArchitecture(architecture):
global kernelSelectionData, kernelSelectionDataHawaii, kernelSelectionDataFiji

if architecture == "Fiji":
kernelSelectionData = kernelSelectionDataFiji
else:
kernelSelectionData = kernelSelectionDataHawaii


################################################################################
# Non-Tile Parameters
Expand All @@ -89,20 +127,20 @@ def getTilesForPrecision(precision):
validTiles = sizeData[2]
# add valid tiles
for tileParams in validTiles:
#print tileParams
#print(tileParams)
tile.workGroupNumRows = tileParams[0]
tile.workGroupNumCols = tileParams[1]
tile.microTileNumRows = tileParams[2]
tile.microTileNumCols = tileParams[3]
tile.macroTileNumRows = tile.workGroupNumRows*tile.microTileNumRows
tile.macroTileNumCols = tile.workGroupNumCols*tile.microTileNumCols
#print tile.getName()
#print(tile.getName())
for unroll in unrolls[precision]:
tile.unroll = unroll
if tile.isValid():
tiles.append( copy.copy(tile) )
else:
print tile.getName() + " - SKIPPING - "
print(tile.getName() + " - SKIPPING - ")

# add fallback tile
tile.workGroupNumRows = fallbackTile[0]
Expand All @@ -116,7 +154,7 @@ def getTilesForPrecision(precision):
if tile.isValid():
tiles.append( copy.copy(tile) )
else:
print tile.getName() + " - SKIPPING - "
print(tile.getName() + " - SKIPPING - ")

setTiles = set(tiles)
tiles = list( setTiles )
Expand Down
4 changes: 2 additions & 2 deletions src/library/blas/AutoGemm/Includes.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ def writeToFile(self):
# Write Includes
################################################################################
def writeIncludes():
print "AutoGemm.py: Generating include files."
print("AutoGemm.py: Generating include files.")
if not os.path.exists( Common.getIncludePath() ):
os.makedirs( Common.getIncludePath() )

Expand Down Expand Up @@ -490,5 +490,5 @@ def writeIncludes():
if len(sys.argv) == 2:
Common.setOutputPath(sys.argv[1])
else:
print "Warning: No output path specified; default is working directory."
print("Warning: No output path specified; default is working directory.")
writeIncludes()
Loading

0 comments on commit d16f7b3

Please sign in to comment.