diff --git a/.gitmodules b/.gitmodules index 3f0b222a86c6..8011ec12d24b 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,9 +1,9 @@ [submodule "dmlc-core"] - path = dmlc-core + path = 3rdparty/dmlc-core url = https://github.com/dmlc/dmlc-core [submodule "HalideIR"] - path = HalideIR + path = 3rdparty/HalideIR url = https://github.com/dmlc/HalideIR [submodule "dlpack"] - path = dlpack + path = 3rdparty/dlpack url = https://github.com/dmlc/dlpack diff --git a/HalideIR b/3rdparty/HalideIR similarity index 100% rename from HalideIR rename to 3rdparty/HalideIR diff --git a/3rdparty/compiler-rt/builtin_fp16.h b/3rdparty/compiler-rt/builtin_fp16.h new file mode 100644 index 000000000000..1657d2830119 --- /dev/null +++ b/3rdparty/compiler-rt/builtin_fp16.h @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2009-2015 by llvm/compiler-rt contributors + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + + * Copyright (c) 2018 by Contributors + * \file builtin_fp16.cc + * \brief Functions for conversion between fp32 and fp16, adopted from compiler-rt. + */ + +#include + +static inline uint32_t __clz(uint32_t x) { + // count leading zeros + int n = 32; + uint32_t y; + + y = x >>16; if (y) { n = n -16; x = y; } + y = x >> 8; if (y) { n = n - 8; x = y; } + y = x >> 4; if (y) { n = n - 4; x = y; } + y = x >> 2; if (y) { n = n - 2; x = y; } + y = x >> 1; if (y) return n - 2; + return n - x; +} + +template +static inline DST_T __truncXfYf2__(SRC_T a) { + // Various constants whose values follow from the type parameters. + // Any reasonable optimizer will fold and propagate all of these. + const int srcBits = sizeof(SRC_T) * 8; + const int srcExpBits = srcBits - SRC_SIG_BITS - 1; + const int srcInfExp = (1 << srcExpBits) - 1; + const int srcExpBias = srcInfExp >> 1; + + const SRC_REP_T srcMinNormal = SRC_REP_T(1) << SRC_SIG_BITS; + const SRC_REP_T srcSignificandMask = srcMinNormal - 1; + const SRC_REP_T srcInfinity = (SRC_REP_T)srcInfExp << SRC_SIG_BITS; + const SRC_REP_T srcSignMask = SRC_REP_T(1) << (SRC_SIG_BITS + srcExpBits); + const SRC_REP_T srcAbsMask = srcSignMask - 1; + const SRC_REP_T roundMask = (SRC_REP_T(1) << (SRC_SIG_BITS - DST_SIG_BITS)) - 1; + const SRC_REP_T halfway = SRC_REP_T(1) << (SRC_SIG_BITS - DST_SIG_BITS - 1); + const SRC_REP_T srcQNaN = SRC_REP_T(1) << (SRC_SIG_BITS - 1); + const SRC_REP_T srcNaNCode = srcQNaN - 1; + + const int dstBits = sizeof(DST_T) * 8; + const int dstExpBits = dstBits - DST_SIG_BITS - 1; + const int dstInfExp = (1 << dstExpBits) - 1; + const int dstExpBias = dstInfExp >> 1; + + const int underflowExponent = srcExpBias + 1 - dstExpBias; + const int overflowExponent = srcExpBias + dstInfExp - dstExpBias; + const SRC_REP_T underflow = (SRC_REP_T)underflowExponent << SRC_SIG_BITS; + const SRC_REP_T overflow = (SRC_REP_T)overflowExponent << SRC_SIG_BITS; + + const DST_REP_T dstQNaN = DST_REP_T(1) << (DST_SIG_BITS - 1); + const DST_REP_T dstNaNCode = dstQNaN - 1; + + // Break a into a sign and representation of the absolute value + const union { SRC_T f; SRC_REP_T i; } src_rep = {.f = a}; + const SRC_REP_T aRep = src_rep.i; + const SRC_REP_T aAbs = aRep & srcAbsMask; + const SRC_REP_T sign = aRep & srcSignMask; + DST_REP_T absResult; + + if (aAbs - underflow < aAbs - overflow) { + // The exponent of a is within the range of normal numbers in the + // destination format. We can convert by simply right-shifting with + // rounding and adjusting the exponent. + absResult = aAbs >> (SRC_SIG_BITS - DST_SIG_BITS); + absResult -= (DST_REP_T)(srcExpBias - dstExpBias) << DST_SIG_BITS; + + const SRC_REP_T roundBits = aAbs & roundMask; + // Round to nearest + if (roundBits > halfway) + absResult++; + // Ties to even + else if (roundBits == halfway) + absResult += absResult & 1; + } + else if (aAbs > srcInfinity) { + // a is NaN. + // Conjure the result by beginning with infinity, setting the qNaN + // bit and inserting the (truncated) trailing NaN field. + absResult = (DST_REP_T)dstInfExp << DST_SIG_BITS; + absResult |= dstQNaN; + absResult |= ((aAbs & srcNaNCode) >> (SRC_SIG_BITS - DST_SIG_BITS)) & dstNaNCode; + } + else if (aAbs >= overflow) { + // a overflows to infinity. + absResult = (DST_REP_T)dstInfExp << DST_SIG_BITS; + } + else { + // a underflows on conversion to the destination type or is an exact + // zero. The result may be a denormal or zero. Extract the exponent + // to get the shift amount for the denormalization. + const int aExp = aAbs >> SRC_SIG_BITS; + const int shift = srcExpBias - dstExpBias - aExp + 1; + + const SRC_REP_T significand = (aRep & srcSignificandMask) | srcMinNormal; + + // Right shift by the denormalization amount with sticky. + if (shift > SRC_SIG_BITS) { + absResult = 0; + } else { + const bool sticky = significand << (srcBits - shift); + SRC_REP_T denormalizedSignificand = significand >> shift | sticky; + absResult = denormalizedSignificand >> (SRC_SIG_BITS - DST_SIG_BITS); + const SRC_REP_T roundBits = denormalizedSignificand & roundMask; + // Round to nearest + if (roundBits > halfway) + absResult++; + // Ties to even + else if (roundBits == halfway) + absResult += absResult & 1; + } + } + + // Apply the signbit to (DST_T)abs(a). + const DST_REP_T result = absResult | sign >> (srcBits - dstBits); + const union { DST_T f; DST_REP_T i; } dst_rep = {.i = result}; + return dst_rep.f; +} + +template +static inline DST_T __extendXfYf2__(SRC_T a) { + // Various constants whose values follow from the type parameters. + // Any reasonable optimizer will fold and propagate all of these. + const int srcBits = sizeof(SRC_T) * 8; + const int srcExpBits = srcBits - SRC_SIG_BITS - 1; + const int srcInfExp = (1 << srcExpBits) - 1; + const int srcExpBias = srcInfExp >> 1; + + const SRC_REP_T srcMinNormal = SRC_REP_T(1) << SRC_SIG_BITS; + const SRC_REP_T srcInfinity = (SRC_REP_T)srcInfExp << SRC_SIG_BITS; + const SRC_REP_T srcSignMask = SRC_REP_T(1) << (SRC_SIG_BITS + srcExpBits); + const SRC_REP_T srcAbsMask = srcSignMask - 1; + const SRC_REP_T srcQNaN = SRC_REP_T(1) << (SRC_SIG_BITS - 1); + const SRC_REP_T srcNaNCode = srcQNaN - 1; + + const int dstBits = sizeof(DST_T)*8; + const int dstExpBits = dstBits - DST_SIG_BITS - 1; + const int dstInfExp = (1 << dstExpBits) - 1; + const int dstExpBias = dstInfExp >> 1; + + const DST_REP_T dstMinNormal = DST_REP_T(1) << DST_SIG_BITS; + + // Break a into a sign and representation of the absolute value + const union { SRC_T f; SRC_REP_T i; } src_rep = {.f = a}; + const SRC_REP_T aRep = src_rep.i; + const SRC_REP_T aAbs = aRep & srcAbsMask; + const SRC_REP_T sign = aRep & srcSignMask; + DST_REP_T absResult; + + // If sizeof(SRC_REP_T) < sizeof(int), the subtraction result is promoted + // to (signed) int. To avoid that, explicitly cast to SRC_REP_T. + if ((SRC_REP_T)(aAbs - srcMinNormal) < srcInfinity - srcMinNormal) { + // a is a normal number. + // Extend to the destination type by shifting the significand and + // exponent into the proper position and rebiasing the exponent. + absResult = (DST_REP_T)aAbs << (DST_SIG_BITS - SRC_SIG_BITS); + absResult += (DST_REP_T)(dstExpBias - srcExpBias) << DST_SIG_BITS; + } + + else if (aAbs >= srcInfinity) { + // a is NaN or infinity. + // Conjure the result by beginning with infinity, then setting the qNaN + // bit (if needed) and right-aligning the rest of the trailing NaN + // payload field. + absResult = (DST_REP_T)dstInfExp << DST_SIG_BITS; + absResult |= (DST_REP_T)(aAbs & srcQNaN) << (DST_SIG_BITS - SRC_SIG_BITS); + absResult |= (DST_REP_T)(aAbs & srcNaNCode) << (DST_SIG_BITS - SRC_SIG_BITS); + } + else if (aAbs) { + // a is denormal. + // renormalize the significand and clear the leading bit, then insert + // the correct adjusted exponent in the destination type. + const int scale = __clz(aAbs) - __clz(srcMinNormal); + absResult = (DST_REP_T)aAbs << (DST_SIG_BITS - SRC_SIG_BITS + scale); + absResult ^= dstMinNormal; + const int resultExponent = dstExpBias - srcExpBias - scale + 1; + absResult |= (DST_REP_T)resultExponent << DST_SIG_BITS; + } + else { + // a is zero. + absResult = 0; + } + + // Apply the signbit to (DST_T)abs(a). + const DST_REP_T result = absResult | (DST_REP_T)sign << (dstBits - srcBits); + const union { DST_T f; DST_REP_T i; } dst_rep = {.i = result}; + return dst_rep.f; +} diff --git a/dlpack b/3rdparty/dlpack similarity index 100% rename from dlpack rename to 3rdparty/dlpack diff --git a/dmlc-core b/3rdparty/dmlc-core similarity index 100% rename from dmlc-core rename to 3rdparty/dmlc-core diff --git a/CMakeLists.txt b/CMakeLists.txt index 1f03b9f64ab9..5e1d8bcc38fb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,8 +50,9 @@ tvm_option(USE_RANDOM "Build with random support" OFF) # include directories include_directories("include") -include_directories("dlpack/include") -include_directories("dmlc-core/include") +include_directories("3rdparty/dlpack/include") +include_directories("3rdparty/dmlc-core/include") +include_directories("3rdparty/compiler-rt") # initial variables set(TVM_LINKER_LIBS "") @@ -87,8 +88,8 @@ else(MSVC) endif(MSVC) # add source group -FILE(GLOB_RECURSE GROUP_SOURCE "src/*.cc" "HalideIR/src/*.cpp" "nnvm/src/*.cc") -FILE(GLOB_RECURSE GROUP_INCLUDE "src/*.h" "include/*.h" "HalideIR/src/*.h" +FILE(GLOB_RECURSE GROUP_SOURCE "src/*.cc" "3rdparty/HalideIR/src/*.cpp" "nnvm/src/*.cc") +FILE(GLOB_RECURSE GROUP_INCLUDE "src/*.h" "include/*.h" "3rdparty/HalideIR/src/*.h" "nnvm/src/*.h" "nnvm/include/*.h") assign_source_group("Source" ${GROUP_SOURCE}) assign_source_group("Include" ${GROUP_INCLUDE}) @@ -127,7 +128,7 @@ file(GLOB_RECURSE NNVM_COMPILER_SRCS file(GLOB TOPI_SRCS topi/src/*.cc ) -file(GLOB_RECURSE HALIDEIR_SRCS HalideIR/src/*.cpp) +file(GLOB_RECURSE HALIDEIR_SRCS 3rdparty/HalideIR/src/*.cpp) list(APPEND COMPILER_SRCS ${HALIDEIR_SRCS}) file(GLOB RUNTIME_SRCS src/runtime/*.cc) @@ -194,7 +195,7 @@ target_link_libraries(nnvm_compiler tvm) # Related headers target_include_directories( tvm - PUBLIC "HalideIR/src" + PUBLIC "3rdparty/HalideIR/src" PUBLIC "topi/include") target_include_directories( tvm_topi @@ -244,12 +245,12 @@ if (INSTALL_DEV) PATTERN "*.h" ) install( - DIRECTORY "HalideIR/src/." DESTINATION "include/HalideIR" + DIRECTORY "3rdparty/HalideIR/src/." DESTINATION "include/HalideIR" FILES_MATCHING PATTERN "*.h" ) install( - DIRECTORY "dlpack/include/." DESTINATION "include" + DIRECTORY "3rdparty/dlpack/include/." DESTINATION "include" FILES_MATCHING PATTERN "*.h" ) diff --git a/Makefile b/Makefile index 2d3d4843c4c0..6a9e3063de39 100644 --- a/Makefile +++ b/Makefile @@ -4,11 +4,11 @@ ROOTDIR = $(CURDIR) cython cython2 cython3 web runtime vta ifndef DMLC_CORE_PATH - DMLC_CORE_PATH = $(ROOTDIR)/dmlc-core + DMLC_CORE_PATH = $(ROOTDIR)/3rdparty/dmlc-core endif ifndef DLPACK_PATH - DLPACK_PATH = $(ROOTDIR)/dlpack + DLPACK_PATH = $(ROOTDIR)/3rdparty/dlpack endif INCLUDE_FLAGS = -Iinclude -I$(DLPACK_PATH)/include -I$(DMLC_CORE_PATH)/include @@ -50,10 +50,10 @@ build/libtvm_web_runtime.js: build/libtvm_web_runtime.bc # Lint scripts cpplint: - python3 dmlc-core/scripts/lint.py vta cpp vta/include vta/src - python3 dmlc-core/scripts/lint.py topi cpp topi/include; - python3 dmlc-core/scripts/lint.py nnvm cpp nnvm/include nnvm/src; - python3 dmlc-core/scripts/lint.py tvm cpp include src verilog\ + python3 3rdparty/dmlc-core/scripts/lint.py vta cpp vta/include vta/src + python3 3rdparty/dmlc-core/scripts/lint.py topi cpp topi/include; + python3 3rdparty/dmlc-core/scripts/lint.py nnvm cpp nnvm/include nnvm/src; + python3 3rdparty/dmlc-core/scripts/lint.py tvm cpp include src verilog\ examples/extension/src examples/graph_executor/src pylint: @@ -63,7 +63,7 @@ pylint: python3 -m pylint vta/python/vta --rcfile=$(ROOTDIR)/tests/lint/pylintrc jnilint: - python3 dmlc-core/scripts/lint.py tvm4j-jni cpp jvm/native/src + python3 3rdparty/dmlc-core/scripts/lint.py tvm4j-jni cpp jvm/native/src lint: cpplint pylint jnilint diff --git a/apps/android_deploy/app/src/main/jni/Android.mk b/apps/android_deploy/app/src/main/jni/Android.mk index a99517f90332..da5f499ea706 100644 --- a/apps/android_deploy/app/src/main/jni/Android.mk +++ b/apps/android_deploy/app/src/main/jni/Android.mk @@ -20,9 +20,9 @@ LOCAL_SRC_FILES := ml_dmlc_tvm_native_c_api.cc LOCAL_LDFLAGS := -L$(SYSROOT)/usr/lib/ -llog LOCAL_C_INCLUDES := $(ROOT_PATH)/include \ - $(ROOT_PATH)/dlpack/include \ - $(ROOT_PATH)/dmlc-core/include \ - $(ROOT_PATH)/HalideIR/src \ + $(ROOT_PATH)/3rdparty/dlpack/include \ + $(ROOT_PATH)/3rdparty/dmlc-core/include \ + $(ROOT_PATH)/3rdparty/HalideIR/src \ $(ROOT_PATH)/topi/include LOCAL_MODULE = tvm4j_runtime_packed diff --git a/apps/android_rpc/app/src/main/jni/Android.mk b/apps/android_rpc/app/src/main/jni/Android.mk index a99517f90332..da5f499ea706 100644 --- a/apps/android_rpc/app/src/main/jni/Android.mk +++ b/apps/android_rpc/app/src/main/jni/Android.mk @@ -20,9 +20,9 @@ LOCAL_SRC_FILES := ml_dmlc_tvm_native_c_api.cc LOCAL_LDFLAGS := -L$(SYSROOT)/usr/lib/ -llog LOCAL_C_INCLUDES := $(ROOT_PATH)/include \ - $(ROOT_PATH)/dlpack/include \ - $(ROOT_PATH)/dmlc-core/include \ - $(ROOT_PATH)/HalideIR/src \ + $(ROOT_PATH)/3rdparty/dlpack/include \ + $(ROOT_PATH)/3rdparty/dmlc-core/include \ + $(ROOT_PATH)/3rdparty/HalideIR/src \ $(ROOT_PATH)/topi/include LOCAL_MODULE = tvm4j_runtime_packed diff --git a/apps/extension/Makefile b/apps/extension/Makefile index 29b9a1163f16..3a1f8a2160ee 100644 --- a/apps/extension/Makefile +++ b/apps/extension/Makefile @@ -2,9 +2,9 @@ TVM_ROOT=$(shell cd ../..; pwd) PKG_CFLAGS = -std=c++11 -O2 -fPIC\ -I${TVM_ROOT}/include\ - -I${TVM_ROOT}/dmlc-core/include\ - -I${TVM_ROOT}/dlpack/include\ - -I${TVM_ROOT}/HalideIR/src + -I${TVM_ROOT}/3rdparty/dmlc-core/include\ + -I${TVM_ROOT}/3rdparty/dlpack/include\ + -I${TVM_ROOT}/3rdparty/HalideIR/src PKG_LDFLAGS =-L${TVM_ROOT}/lib UNAME_S := $(shell uname -s) diff --git a/apps/howto_deploy/Makefile b/apps/howto_deploy/Makefile index ad4e56680d21..7accb7dd64ae 100644 --- a/apps/howto_deploy/Makefile +++ b/apps/howto_deploy/Makefile @@ -1,12 +1,12 @@ # Makefile Example to deploy TVM modules. TVM_ROOT=$(shell cd ../..; pwd) NNVM_PATH=nnvm -DMLC_CORE=${TVM_ROOT}/dmlc-core +DMLC_CORE=${TVM_ROOT}/3rdparty/dmlc-core PKG_CFLAGS = -std=c++11 -O2 -fPIC\ -I${TVM_ROOT}/include\ -I${DMLC_CORE}/include\ - -I${TVM_ROOT}/dlpack/include\ + -I${TVM_ROOT}/3rdparty/dlpack/include\ PKG_LDFLAGS = -L${TVM_ROOT}/build -ldl -lpthread diff --git a/apps/howto_deploy/tvm_runtime_pack.cc b/apps/howto_deploy/tvm_runtime_pack.cc index 27f95e9e6065..c4b6e2a2d44e 100644 --- a/apps/howto_deploy/tvm_runtime_pack.cc +++ b/apps/howto_deploy/tvm_runtime_pack.cc @@ -8,8 +8,8 @@ * - Compile with -std=c++11 * - Add the following include path * - /path/to/tvm/include/ - * - /path/to/tvm/dmlc-core/include/ - * - /path/to/tvm/dlpack/include/ + * - /path/to/tvm/3rdparty/dmlc-core/include/ + * - /path/to/tvm/3rdparty/dlpack/include/ * - Add -lpthread -ldl to the linked library. * - You are good to go. * - See the Makefile in the same folder for example. diff --git a/apps/ios_rpc/tvmrpc.xcodeproj/project.pbxproj b/apps/ios_rpc/tvmrpc.xcodeproj/project.pbxproj index d53ed6ba4cb9..60b6e99e7a92 100644 --- a/apps/ios_rpc/tvmrpc.xcodeproj/project.pbxproj +++ b/apps/ios_rpc/tvmrpc.xcodeproj/project.pbxproj @@ -386,8 +386,8 @@ GCC_SYMBOLS_PRIVATE_EXTERN = NO; HEADER_SEARCH_PATHS = ( ../../include, - ../../dlpack/include, - "../../dmlc-core/include", + ../../3rdparty/dlpack/include, + "../../3rdparty/dmlc-core/include", ); INFOPLIST_FILE = tvmrpc/Info.plist; LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks"; @@ -406,8 +406,8 @@ GCC_SYMBOLS_PRIVATE_EXTERN = NO; HEADER_SEARCH_PATHS = ( ../../include, - ../../dlpack/include, - "../../dmlc-core/include", + ../../3rdparty/dlpack/include, + "../../3rdparty/dmlc-core/include", ); INFOPLIST_FILE = tvmrpc/Info.plist; LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks"; @@ -422,9 +422,9 @@ BUNDLE_LOADER = "$(TEST_HOST)"; DEVELOPMENT_TEAM = 3FR42MXLK9; HEADER_SEARCH_PATHS = ( - ../../dlpack/include, + ../../3rdparty/dlpack/include, ../../include, - "../../dmlc-core/include", + "../../3rdparty/dmlc-core/include", ); INFOPLIST_FILE = tvmrpcLauncher/Info.plist; LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks @loader_path/Frameworks"; @@ -440,9 +440,9 @@ BUNDLE_LOADER = "$(TEST_HOST)"; DEVELOPMENT_TEAM = 3FR42MXLK9; HEADER_SEARCH_PATHS = ( - ../../dlpack/include, + ../../3rdparty/dlpack/include, ../../include, - "../../dmlc-core/include", + "../../3rdparty/dmlc-core/include", ); INFOPLIST_FILE = tvmrpcLauncher/Info.plist; LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks @loader_path/Frameworks"; diff --git a/apps/rocm_rpc/Makefile b/apps/rocm_rpc/Makefile index b4e527980941..d4e3ec06ca99 100644 --- a/apps/rocm_rpc/Makefile +++ b/apps/rocm_rpc/Makefile @@ -3,12 +3,12 @@ ROCM_PATH=/opt/rocm TVM_ROOT=$(shell cd ../..; pwd) NNVM_PATH=nnvm -DMLC_CORE=${TVM_ROOT}/dmlc-core +DMLC_CORE=${TVM_ROOT}/3rdparty/dmlc-core PKG_CFLAGS = -std=c++11 -O2 -fPIC\ -I${TVM_ROOT}/include\ -I${DMLC_CORE}/include\ - -I${TVM_ROOT}/dlpack/include\ + -I${TVM_ROOT}/3rdparty/dlpack/include\ -I${ROCM_PATH}/include PKG_LDFLAGS = -L${ROCM_PATH}/lib -L${TVM_ROOT}/lib -ldl -lpthread -lhip_hcc -lMIOpen diff --git a/apps/sgx/Makefile b/apps/sgx/Makefile index cd7034d4c41b..1038f57c3ba1 100644 --- a/apps/sgx/Makefile +++ b/apps/sgx/Makefile @@ -23,7 +23,7 @@ uservice_library_name := sgx_uae_service$(sgx_sim) pkg_cflags := -std=c++11 -O2 -fPIC\ -I${TVM_ROOT}/include\ -I${DMLC_CORE}/include\ - -I${TVM_ROOT}/dlpack/include\ + -I${TVM_ROOT}/3rdparty/dlpack/include\ -I.\ -DDMLC_LOG_STACK_TRACE=0\ -fmax-errors=4 diff --git a/nnvm/Makefile b/nnvm/Makefile index adbae329e144..4ebd9ac95b70 100644 --- a/nnvm/Makefile +++ b/nnvm/Makefile @@ -13,7 +13,7 @@ TVMPATH = .. export LDFLAGS = -pthread -lm export CFLAGS = -std=c++11 -Wall -O2 -Iinclude -fPIC -CFLAGS += -I$(TVMPATH)/include -I$(TVMPATH)/dlpack/include -I$(TVMPATH)/HalideIR/src -I$(TVMPATH)/topi/include +CFLAGS += -I$(TVMPATH)/include -I$(TVMPATH)/3rdparty/dlpack/include -I$(TVMPATH)/3rdparty/HalideIR/src -I$(TVMPATH)/topi/include ifdef DMLC_CORE_PATH CFLAGS += -I$(DMLC_CORE_PATH)/include diff --git a/nnvm/amalgamation/Makefile b/nnvm/amalgamation/Makefile index 1f286f055237..4305339e0075 100644 --- a/nnvm/amalgamation/Makefile +++ b/nnvm/amalgamation/Makefile @@ -4,7 +4,7 @@ export CFLAGS = -std=c++11 -Wall -O2 -Iinclude -fPIC ifdef DMLC_CORE_PATH CFLAGS += -I$(DMLC_CORE_PATH)/include else - CFLAGS += -I$(CURDIR)/../dmlc-core/include + CFLAGS += -I$(CURDIR)/../3rdparty/dmlc-core/include endif .PHONY: all clean diff --git a/python/setup.py b/python/setup.py index cbf8c5591703..71d61a52e349 100644 --- a/python/setup.py +++ b/python/setup.py @@ -74,8 +74,8 @@ def config_cython(): "tvm._ffi.%s.%s" % (subdir, fn[:-4]), ["tvm/_ffi/_cython/%s" % fn], include_dirs=["../include/", - "../dmlc-core/include", - "../dlpack/include", + "../3rdparty/dmlc-core/include", + "../3rdparty/dlpack/include", ], library_dirs=library_dirs, libraries=libraries, diff --git a/src/runtime/builtin_fp16.cc b/src/runtime/builtin_fp16.cc new file mode 100644 index 000000000000..c259399e05e9 --- /dev/null +++ b/src/runtime/builtin_fp16.cc @@ -0,0 +1,21 @@ +/*! + * Copyright (c) 2018 by Contributors + * \file builtin_fp16.cc + * \brief Functions for conversion between fp32 and fp16 +*/ + +#include + +namespace tvm { +namespace runtime { + +extern "C" uint16_t __gnu_f2h_ieee(float a) { + return __truncXfYf2__(a); +} + +extern "C" float __gnu_h2f_ieee(uint16_t a) { + return __extendXfYf2__(a); +} + +} // namespace runtime +} // namespace tvm diff --git a/tests/python/unittest/test_runtime_ndarray.py b/tests/python/unittest/test_runtime_ndarray.py index 9f33e2aabfd8..7be538199a58 100644 --- a/tests/python/unittest/test_runtime_ndarray.py +++ b/tests/python/unittest/test_runtime_ndarray.py @@ -35,5 +35,26 @@ def test_nd_create(): ctx.sync() +def test_fp16_conversion(): + n = 100 + + for (src, dst) in [('float32', 'float16'), ('float16', 'float32')]: + A = tvm.placeholder((n,), dtype=src) + B = tvm.compute((n,), lambda i: A[i].astype(dst)) + + s = tvm.create_schedule([B.op]) + func = tvm.build(s, [A, B], 'llvm') + + x_tvm = tvm.nd.array(100 * np.random.randn(n).astype(src) - 50) + y_tvm = tvm.nd.array(100 * np.random.randn(n).astype(dst) - 50) + + func(x_tvm, y_tvm) + + expected = x_tvm.asnumpy().astype(dst) + real = y_tvm.asnumpy() + + np.testing.assert_allclose(expected, real) + if __name__ == "__main__": test_nd_create() + test_fp16_conversion() diff --git a/vta/python/vta/pkg_config.py b/vta/python/vta/pkg_config.py index c3fe09effb76..30b4808f5e2d 100644 --- a/vta/python/vta/pkg_config.py +++ b/vta/python/vta/pkg_config.py @@ -42,8 +42,8 @@ def __init__(self, cfg, proj_root): self.include_path = [ "-I%s/include" % proj_root, "-I%s/vta/include" % proj_root, - "-I%s/dlpack/include" % proj_root, - "-I%s/dmlc-core/include" % proj_root + "-I%s/3rdparty/dlpack/include" % proj_root, + "-I%s/3rdparty/dmlc-core/include" % proj_root ] # List of source files that can be used to build standalone library. self.lib_source = []