diff --git a/.gitmodules b/.gitmodules
index 3f0b222a86c6..8011ec12d24b 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,9 +1,9 @@
 [submodule "dmlc-core"]
-	path = dmlc-core
+	path = 3rdparty/dmlc-core
 	url = https://github.com/dmlc/dmlc-core
 [submodule "HalideIR"]
-	path = HalideIR
+	path = 3rdparty/HalideIR
 	url = https://github.com/dmlc/HalideIR
 [submodule "dlpack"]
-	path = dlpack
+	path = 3rdparty/dlpack
 	url = https://github.com/dmlc/dlpack
diff --git a/HalideIR b/3rdparty/HalideIR
similarity index 100%
rename from HalideIR
rename to 3rdparty/HalideIR
diff --git a/3rdparty/compiler-rt/builtin_fp16.h b/3rdparty/compiler-rt/builtin_fp16.h
new file mode 100644
index 000000000000..1657d2830119
--- /dev/null
+++ b/3rdparty/compiler-rt/builtin_fp16.h
@@ -0,0 +1,210 @@
+/*
+ * Copyright (c) 2009-2015 by llvm/compiler-rt contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+
+ * Copyright (c) 2018 by Contributors
+ * \file builtin_fp16.cc
+ * \brief Functions for conversion between fp32 and fp16, adopted from compiler-rt.
+ */
+
+#include <cstdint>
+
+static inline uint32_t __clz(uint32_t x) {
+  // count leading zeros
+  int n = 32;
+  uint32_t y;
+
+  y = x >>16; if (y) { n = n -16; x = y; }
+  y = x >> 8; if (y) { n = n - 8; x = y; }
+  y = x >> 4; if (y) { n = n - 4; x = y; }
+  y = x >> 2; if (y) { n = n - 2; x = y; }
+  y = x >> 1; if (y) return n - 2;
+  return n - x;
+}
+
+template <typename SRC_T, typename SRC_REP_T, int SRC_SIG_BITS,
+          typename DST_T, typename DST_REP_T, int DST_SIG_BITS>
+static inline DST_T __truncXfYf2__(SRC_T a) {
+  // Various constants whose values follow from the type parameters.
+  // Any reasonable optimizer will fold and propagate all of these.
+  const int srcBits = sizeof(SRC_T) * 8;
+  const int srcExpBits = srcBits - SRC_SIG_BITS - 1;
+  const int srcInfExp = (1 << srcExpBits) - 1;
+  const int srcExpBias = srcInfExp >> 1;
+
+  const SRC_REP_T srcMinNormal = SRC_REP_T(1) << SRC_SIG_BITS;
+  const SRC_REP_T srcSignificandMask = srcMinNormal - 1;
+  const SRC_REP_T srcInfinity = (SRC_REP_T)srcInfExp << SRC_SIG_BITS;
+  const SRC_REP_T srcSignMask = SRC_REP_T(1) << (SRC_SIG_BITS + srcExpBits);
+  const SRC_REP_T srcAbsMask = srcSignMask - 1;
+  const SRC_REP_T roundMask = (SRC_REP_T(1) << (SRC_SIG_BITS - DST_SIG_BITS)) - 1;
+  const SRC_REP_T halfway = SRC_REP_T(1) << (SRC_SIG_BITS - DST_SIG_BITS - 1);
+  const SRC_REP_T srcQNaN = SRC_REP_T(1) << (SRC_SIG_BITS - 1);
+  const SRC_REP_T srcNaNCode = srcQNaN - 1;
+
+  const int dstBits = sizeof(DST_T) * 8;
+  const int dstExpBits = dstBits - DST_SIG_BITS - 1;
+  const int dstInfExp = (1 << dstExpBits) - 1;
+  const int dstExpBias = dstInfExp >> 1;
+
+  const int underflowExponent = srcExpBias + 1 - dstExpBias;
+  const int overflowExponent = srcExpBias + dstInfExp - dstExpBias;
+  const SRC_REP_T underflow = (SRC_REP_T)underflowExponent << SRC_SIG_BITS;
+  const SRC_REP_T overflow = (SRC_REP_T)overflowExponent << SRC_SIG_BITS;
+
+  const DST_REP_T dstQNaN = DST_REP_T(1) << (DST_SIG_BITS - 1);
+  const DST_REP_T dstNaNCode = dstQNaN - 1;
+
+  // Break a into a sign and representation of the absolute value
+  const union { SRC_T f; SRC_REP_T i; } src_rep = {.f = a};
+  const SRC_REP_T aRep = src_rep.i;
+  const SRC_REP_T aAbs = aRep & srcAbsMask;
+  const SRC_REP_T sign = aRep & srcSignMask;
+  DST_REP_T absResult;
+
+  if (aAbs - underflow < aAbs - overflow) {
+    // The exponent of a is within the range of normal numbers in the
+    // destination format.  We can convert by simply right-shifting with
+    // rounding and adjusting the exponent.
+    absResult = aAbs >> (SRC_SIG_BITS - DST_SIG_BITS);
+    absResult -= (DST_REP_T)(srcExpBias - dstExpBias) << DST_SIG_BITS;
+
+    const SRC_REP_T roundBits = aAbs & roundMask;
+    // Round to nearest
+    if (roundBits > halfway)
+      absResult++;
+      // Ties to even
+    else if (roundBits == halfway)
+      absResult += absResult & 1;
+  }
+  else if (aAbs > srcInfinity) {
+    // a is NaN.
+    // Conjure the result by beginning with infinity, setting the qNaN
+    // bit and inserting the (truncated) trailing NaN field.
+    absResult = (DST_REP_T)dstInfExp << DST_SIG_BITS;
+    absResult |= dstQNaN;
+    absResult |= ((aAbs & srcNaNCode) >> (SRC_SIG_BITS - DST_SIG_BITS)) & dstNaNCode;
+  }
+  else if (aAbs >= overflow) {
+    // a overflows to infinity.
+    absResult = (DST_REP_T)dstInfExp << DST_SIG_BITS;
+  }
+  else {
+    // a underflows on conversion to the destination type or is an exact
+    // zero.  The result may be a denormal or zero.  Extract the exponent
+    // to get the shift amount for the denormalization.
+    const int aExp = aAbs >> SRC_SIG_BITS;
+    const int shift = srcExpBias - dstExpBias - aExp + 1;
+
+    const SRC_REP_T significand = (aRep & srcSignificandMask) | srcMinNormal;
+
+    // Right shift by the denormalization amount with sticky.
+    if (shift > SRC_SIG_BITS) {
+      absResult = 0;
+    } else {
+      const bool sticky = significand << (srcBits - shift);
+      SRC_REP_T denormalizedSignificand = significand >> shift | sticky;
+      absResult = denormalizedSignificand >> (SRC_SIG_BITS - DST_SIG_BITS);
+      const SRC_REP_T roundBits = denormalizedSignificand & roundMask;
+      // Round to nearest
+      if (roundBits > halfway)
+        absResult++;
+        // Ties to even
+      else if (roundBits == halfway)
+        absResult += absResult & 1;
+    }
+  }
+
+  // Apply the signbit to (DST_T)abs(a).
+  const DST_REP_T result = absResult | sign >> (srcBits - dstBits);
+  const union { DST_T f; DST_REP_T i; } dst_rep = {.i = result};
+  return dst_rep.f;
+}
+
+template<typename SRC_T, typename SRC_REP_T, int SRC_SIG_BITS,
+         typename DST_T, typename DST_REP_T, int DST_SIG_BITS>
+static inline DST_T __extendXfYf2__(SRC_T a) {
+  // Various constants whose values follow from the type parameters.
+  // Any reasonable optimizer will fold and propagate all of these.
+  const int srcBits = sizeof(SRC_T) * 8;
+  const int srcExpBits = srcBits - SRC_SIG_BITS - 1;
+  const int srcInfExp = (1 << srcExpBits) - 1;
+  const int srcExpBias = srcInfExp >> 1;
+
+  const SRC_REP_T srcMinNormal = SRC_REP_T(1) << SRC_SIG_BITS;
+  const SRC_REP_T srcInfinity = (SRC_REP_T)srcInfExp << SRC_SIG_BITS;
+  const SRC_REP_T srcSignMask = SRC_REP_T(1) << (SRC_SIG_BITS + srcExpBits);
+  const SRC_REP_T srcAbsMask = srcSignMask - 1;
+  const SRC_REP_T srcQNaN = SRC_REP_T(1) << (SRC_SIG_BITS - 1);
+  const SRC_REP_T srcNaNCode = srcQNaN - 1;
+
+  const int dstBits = sizeof(DST_T)*8;
+  const int dstExpBits = dstBits - DST_SIG_BITS - 1;
+  const int dstInfExp = (1 << dstExpBits) - 1;
+  const int dstExpBias = dstInfExp >> 1;
+
+  const DST_REP_T dstMinNormal = DST_REP_T(1) << DST_SIG_BITS;
+
+  // Break a into a sign and representation of the absolute value
+  const union { SRC_T f; SRC_REP_T i; } src_rep = {.f = a};
+  const SRC_REP_T aRep = src_rep.i;
+  const SRC_REP_T aAbs = aRep & srcAbsMask;
+  const SRC_REP_T sign = aRep & srcSignMask;
+  DST_REP_T absResult;
+
+  // If sizeof(SRC_REP_T) < sizeof(int), the subtraction result is promoted
+  // to (signed) int.  To avoid that, explicitly cast to SRC_REP_T.
+  if ((SRC_REP_T)(aAbs - srcMinNormal) < srcInfinity - srcMinNormal) {
+    // a is a normal number.
+    // Extend to the destination type by shifting the significand and
+    // exponent into the proper position and rebiasing the exponent.
+    absResult = (DST_REP_T)aAbs << (DST_SIG_BITS - SRC_SIG_BITS);
+    absResult += (DST_REP_T)(dstExpBias - srcExpBias) << DST_SIG_BITS;
+  }
+
+  else if (aAbs >= srcInfinity) {
+    // a is NaN or infinity.
+    // Conjure the result by beginning with infinity, then setting the qNaN
+    // bit (if needed) and right-aligning the rest of the trailing NaN
+    // payload field.
+    absResult = (DST_REP_T)dstInfExp << DST_SIG_BITS;
+    absResult |= (DST_REP_T)(aAbs & srcQNaN) << (DST_SIG_BITS - SRC_SIG_BITS);
+    absResult |= (DST_REP_T)(aAbs & srcNaNCode) << (DST_SIG_BITS - SRC_SIG_BITS);
+  }
+  else if (aAbs) {
+    // a is denormal.
+    // renormalize the significand and clear the leading bit, then insert
+    // the correct adjusted exponent in the destination type.
+    const int scale = __clz(aAbs) - __clz(srcMinNormal);
+    absResult = (DST_REP_T)aAbs << (DST_SIG_BITS - SRC_SIG_BITS + scale);
+    absResult ^= dstMinNormal;
+    const int resultExponent = dstExpBias - srcExpBias - scale + 1;
+    absResult |= (DST_REP_T)resultExponent << DST_SIG_BITS;
+  }
+  else {
+    // a is zero.
+    absResult = 0;
+  }
+
+  // Apply the signbit to (DST_T)abs(a).
+  const DST_REP_T result = absResult | (DST_REP_T)sign << (dstBits - srcBits);
+  const union { DST_T f; DST_REP_T i; } dst_rep = {.i = result};
+  return dst_rep.f;
+}
diff --git a/dlpack b/3rdparty/dlpack
similarity index 100%
rename from dlpack
rename to 3rdparty/dlpack
diff --git a/dmlc-core b/3rdparty/dmlc-core
similarity index 100%
rename from dmlc-core
rename to 3rdparty/dmlc-core
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1f03b9f64ab9..5e1d8bcc38fb 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -50,8 +50,9 @@ tvm_option(USE_RANDOM "Build with random support" OFF)
 
 # include directories
 include_directories("include")
-include_directories("dlpack/include")
-include_directories("dmlc-core/include")
+include_directories("3rdparty/dlpack/include")
+include_directories("3rdparty/dmlc-core/include")
+include_directories("3rdparty/compiler-rt")
 
 # initial variables
 set(TVM_LINKER_LIBS "")
@@ -87,8 +88,8 @@ else(MSVC)
 endif(MSVC)
 
 # add source group
-FILE(GLOB_RECURSE GROUP_SOURCE "src/*.cc" "HalideIR/src/*.cpp" "nnvm/src/*.cc")
-FILE(GLOB_RECURSE GROUP_INCLUDE "src/*.h" "include/*.h" "HalideIR/src/*.h"
+FILE(GLOB_RECURSE GROUP_SOURCE "src/*.cc" "3rdparty/HalideIR/src/*.cpp" "nnvm/src/*.cc")
+FILE(GLOB_RECURSE GROUP_INCLUDE "src/*.h" "include/*.h" "3rdparty/HalideIR/src/*.h"
                                 "nnvm/src/*.h" "nnvm/include/*.h")
 assign_source_group("Source" ${GROUP_SOURCE})
 assign_source_group("Include" ${GROUP_INCLUDE})
@@ -127,7 +128,7 @@ file(GLOB_RECURSE NNVM_COMPILER_SRCS
 file(GLOB TOPI_SRCS
     topi/src/*.cc
 )
-file(GLOB_RECURSE HALIDEIR_SRCS HalideIR/src/*.cpp)
+file(GLOB_RECURSE HALIDEIR_SRCS 3rdparty/HalideIR/src/*.cpp)
 list(APPEND COMPILER_SRCS ${HALIDEIR_SRCS})
 file(GLOB RUNTIME_SRCS src/runtime/*.cc)
 
@@ -194,7 +195,7 @@ target_link_libraries(nnvm_compiler tvm)
 # Related headers
 target_include_directories(
   tvm
-  PUBLIC "HalideIR/src"
+  PUBLIC "3rdparty/HalideIR/src"
   PUBLIC "topi/include")
 target_include_directories(
   tvm_topi
@@ -244,12 +245,12 @@ if (INSTALL_DEV)
     PATTERN "*.h"
   )
   install(
-    DIRECTORY "HalideIR/src/." DESTINATION "include/HalideIR"
+    DIRECTORY "3rdparty/HalideIR/src/." DESTINATION "include/HalideIR"
     FILES_MATCHING
     PATTERN "*.h"
   )
   install(
-    DIRECTORY "dlpack/include/." DESTINATION "include"
+    DIRECTORY "3rdparty/dlpack/include/." DESTINATION "include"
     FILES_MATCHING
     PATTERN "*.h"
     )
diff --git a/Makefile b/Makefile
index 2d3d4843c4c0..6a9e3063de39 100644
--- a/Makefile
+++ b/Makefile
@@ -4,11 +4,11 @@ ROOTDIR = $(CURDIR)
 	 cython cython2 cython3 web runtime vta
 
 ifndef DMLC_CORE_PATH
-  DMLC_CORE_PATH = $(ROOTDIR)/dmlc-core
+  DMLC_CORE_PATH = $(ROOTDIR)/3rdparty/dmlc-core
 endif
 
 ifndef DLPACK_PATH
-  DLPACK_PATH = $(ROOTDIR)/dlpack
+  DLPACK_PATH = $(ROOTDIR)/3rdparty/dlpack
 endif
 
 INCLUDE_FLAGS = -Iinclude -I$(DLPACK_PATH)/include -I$(DMLC_CORE_PATH)/include
@@ -50,10 +50,10 @@ build/libtvm_web_runtime.js: build/libtvm_web_runtime.bc
 
 # Lint scripts
 cpplint:
-	python3 dmlc-core/scripts/lint.py vta cpp vta/include vta/src
-	python3 dmlc-core/scripts/lint.py topi cpp topi/include;
-	python3 dmlc-core/scripts/lint.py nnvm cpp nnvm/include nnvm/src;
-	python3 dmlc-core/scripts/lint.py tvm cpp include src verilog\
+	python3 3rdparty/dmlc-core/scripts/lint.py vta cpp vta/include vta/src
+	python3 3rdparty/dmlc-core/scripts/lint.py topi cpp topi/include;
+	python3 3rdparty/dmlc-core/scripts/lint.py nnvm cpp nnvm/include nnvm/src;
+	python3 3rdparty/dmlc-core/scripts/lint.py tvm cpp include src verilog\
 	 examples/extension/src examples/graph_executor/src
 
 pylint:
@@ -63,7 +63,7 @@ pylint:
 	python3 -m pylint vta/python/vta --rcfile=$(ROOTDIR)/tests/lint/pylintrc
 
 jnilint:
-	python3 dmlc-core/scripts/lint.py tvm4j-jni cpp jvm/native/src
+	python3 3rdparty/dmlc-core/scripts/lint.py tvm4j-jni cpp jvm/native/src
 
 lint: cpplint pylint jnilint
 
diff --git a/apps/android_deploy/app/src/main/jni/Android.mk b/apps/android_deploy/app/src/main/jni/Android.mk
index a99517f90332..da5f499ea706 100644
--- a/apps/android_deploy/app/src/main/jni/Android.mk
+++ b/apps/android_deploy/app/src/main/jni/Android.mk
@@ -20,9 +20,9 @@ LOCAL_SRC_FILES := ml_dmlc_tvm_native_c_api.cc
 LOCAL_LDFLAGS := -L$(SYSROOT)/usr/lib/ -llog
 
 LOCAL_C_INCLUDES := $(ROOT_PATH)/include \
-                    $(ROOT_PATH)/dlpack/include \
-                    $(ROOT_PATH)/dmlc-core/include \
-                    $(ROOT_PATH)/HalideIR/src \
+                    $(ROOT_PATH)/3rdparty/dlpack/include \
+                    $(ROOT_PATH)/3rdparty/dmlc-core/include \
+                    $(ROOT_PATH)/3rdparty/HalideIR/src \
                     $(ROOT_PATH)/topi/include
 
 LOCAL_MODULE = tvm4j_runtime_packed
diff --git a/apps/android_rpc/app/src/main/jni/Android.mk b/apps/android_rpc/app/src/main/jni/Android.mk
index a99517f90332..da5f499ea706 100644
--- a/apps/android_rpc/app/src/main/jni/Android.mk
+++ b/apps/android_rpc/app/src/main/jni/Android.mk
@@ -20,9 +20,9 @@ LOCAL_SRC_FILES := ml_dmlc_tvm_native_c_api.cc
 LOCAL_LDFLAGS := -L$(SYSROOT)/usr/lib/ -llog
 
 LOCAL_C_INCLUDES := $(ROOT_PATH)/include \
-                    $(ROOT_PATH)/dlpack/include \
-                    $(ROOT_PATH)/dmlc-core/include \
-                    $(ROOT_PATH)/HalideIR/src \
+                    $(ROOT_PATH)/3rdparty/dlpack/include \
+                    $(ROOT_PATH)/3rdparty/dmlc-core/include \
+                    $(ROOT_PATH)/3rdparty/HalideIR/src \
                     $(ROOT_PATH)/topi/include
 
 LOCAL_MODULE = tvm4j_runtime_packed
diff --git a/apps/extension/Makefile b/apps/extension/Makefile
index 29b9a1163f16..3a1f8a2160ee 100644
--- a/apps/extension/Makefile
+++ b/apps/extension/Makefile
@@ -2,9 +2,9 @@
 TVM_ROOT=$(shell cd ../..; pwd)
 PKG_CFLAGS = -std=c++11 -O2 -fPIC\
 	-I${TVM_ROOT}/include\
-	-I${TVM_ROOT}/dmlc-core/include\
-	-I${TVM_ROOT}/dlpack/include\
-	-I${TVM_ROOT}/HalideIR/src
+	-I${TVM_ROOT}/3rdparty/dmlc-core/include\
+	-I${TVM_ROOT}/3rdparty/dlpack/include\
+	-I${TVM_ROOT}/3rdparty/HalideIR/src
 
 PKG_LDFLAGS =-L${TVM_ROOT}/lib
 UNAME_S := $(shell uname -s)
diff --git a/apps/howto_deploy/Makefile b/apps/howto_deploy/Makefile
index ad4e56680d21..7accb7dd64ae 100644
--- a/apps/howto_deploy/Makefile
+++ b/apps/howto_deploy/Makefile
@@ -1,12 +1,12 @@
 # Makefile Example to deploy TVM modules.
 TVM_ROOT=$(shell cd ../..; pwd)
 NNVM_PATH=nnvm
-DMLC_CORE=${TVM_ROOT}/dmlc-core
+DMLC_CORE=${TVM_ROOT}/3rdparty/dmlc-core
 
 PKG_CFLAGS = -std=c++11 -O2 -fPIC\
 	-I${TVM_ROOT}/include\
 	-I${DMLC_CORE}/include\
-	-I${TVM_ROOT}/dlpack/include\
+	-I${TVM_ROOT}/3rdparty/dlpack/include\
 
 PKG_LDFLAGS = -L${TVM_ROOT}/build -ldl -lpthread
 
diff --git a/apps/howto_deploy/tvm_runtime_pack.cc b/apps/howto_deploy/tvm_runtime_pack.cc
index 27f95e9e6065..c4b6e2a2d44e 100644
--- a/apps/howto_deploy/tvm_runtime_pack.cc
+++ b/apps/howto_deploy/tvm_runtime_pack.cc
@@ -8,8 +8,8 @@
  *  - Compile with -std=c++11
  *  - Add the following include path
  *     - /path/to/tvm/include/
- *     - /path/to/tvm/dmlc-core/include/
- *     - /path/to/tvm/dlpack/include/
+ *     - /path/to/tvm/3rdparty/dmlc-core/include/
+ *     - /path/to/tvm/3rdparty/dlpack/include/
  *   - Add -lpthread -ldl to the linked library.
  *   - You are good to go.
  *   - See the Makefile in the same folder for example.
diff --git a/apps/ios_rpc/tvmrpc.xcodeproj/project.pbxproj b/apps/ios_rpc/tvmrpc.xcodeproj/project.pbxproj
index d53ed6ba4cb9..60b6e99e7a92 100644
--- a/apps/ios_rpc/tvmrpc.xcodeproj/project.pbxproj
+++ b/apps/ios_rpc/tvmrpc.xcodeproj/project.pbxproj
@@ -386,8 +386,8 @@
 				GCC_SYMBOLS_PRIVATE_EXTERN = NO;
 				HEADER_SEARCH_PATHS = (
 					../../include,
-					../../dlpack/include,
-					"../../dmlc-core/include",
+					../../3rdparty/dlpack/include,
+					"../../3rdparty/dmlc-core/include",
 				);
 				INFOPLIST_FILE = tvmrpc/Info.plist;
 				LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks";
@@ -406,8 +406,8 @@
 				GCC_SYMBOLS_PRIVATE_EXTERN = NO;
 				HEADER_SEARCH_PATHS = (
 					../../include,
-					../../dlpack/include,
-					"../../dmlc-core/include",
+					../../3rdparty/dlpack/include,
+					"../../3rdparty/dmlc-core/include",
 				);
 				INFOPLIST_FILE = tvmrpc/Info.plist;
 				LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks";
@@ -422,9 +422,9 @@
 				BUNDLE_LOADER = "$(TEST_HOST)";
 				DEVELOPMENT_TEAM = 3FR42MXLK9;
 				HEADER_SEARCH_PATHS = (
-					../../dlpack/include,
+					../../3rdparty/dlpack/include,
 					../../include,
-					"../../dmlc-core/include",
+					"../../3rdparty/dmlc-core/include",
 				);
 				INFOPLIST_FILE = tvmrpcLauncher/Info.plist;
 				LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks @loader_path/Frameworks";
@@ -440,9 +440,9 @@
 				BUNDLE_LOADER = "$(TEST_HOST)";
 				DEVELOPMENT_TEAM = 3FR42MXLK9;
 				HEADER_SEARCH_PATHS = (
-					../../dlpack/include,
+					../../3rdparty/dlpack/include,
 					../../include,
-					"../../dmlc-core/include",
+					"../../3rdparty/dmlc-core/include",
 				);
 				INFOPLIST_FILE = tvmrpcLauncher/Info.plist;
 				LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks @loader_path/Frameworks";
diff --git a/apps/rocm_rpc/Makefile b/apps/rocm_rpc/Makefile
index b4e527980941..d4e3ec06ca99 100644
--- a/apps/rocm_rpc/Makefile
+++ b/apps/rocm_rpc/Makefile
@@ -3,12 +3,12 @@ ROCM_PATH=/opt/rocm
 
 TVM_ROOT=$(shell cd ../..; pwd)
 NNVM_PATH=nnvm
-DMLC_CORE=${TVM_ROOT}/dmlc-core
+DMLC_CORE=${TVM_ROOT}/3rdparty/dmlc-core
 
 PKG_CFLAGS = -std=c++11 -O2 -fPIC\
 	-I${TVM_ROOT}/include\
 	-I${DMLC_CORE}/include\
-	-I${TVM_ROOT}/dlpack/include\
+	-I${TVM_ROOT}/3rdparty/dlpack/include\
 	-I${ROCM_PATH}/include
 
 PKG_LDFLAGS = -L${ROCM_PATH}/lib -L${TVM_ROOT}/lib -ldl -lpthread -lhip_hcc -lMIOpen
diff --git a/apps/sgx/Makefile b/apps/sgx/Makefile
index cd7034d4c41b..1038f57c3ba1 100644
--- a/apps/sgx/Makefile
+++ b/apps/sgx/Makefile
@@ -23,7 +23,7 @@ uservice_library_name := sgx_uae_service$(sgx_sim)
 pkg_cflags := -std=c++11 -O2 -fPIC\
 	-I${TVM_ROOT}/include\
 	-I${DMLC_CORE}/include\
-	-I${TVM_ROOT}/dlpack/include\
+	-I${TVM_ROOT}/3rdparty/dlpack/include\
 	-I.\
 	-DDMLC_LOG_STACK_TRACE=0\
 	-fmax-errors=4
diff --git a/nnvm/Makefile b/nnvm/Makefile
index adbae329e144..4ebd9ac95b70 100644
--- a/nnvm/Makefile
+++ b/nnvm/Makefile
@@ -13,7 +13,7 @@ TVMPATH = ..
 
 export LDFLAGS = -pthread -lm
 export CFLAGS = -std=c++11 -Wall -O2 -Iinclude -fPIC
-CFLAGS += -I$(TVMPATH)/include -I$(TVMPATH)/dlpack/include -I$(TVMPATH)/HalideIR/src -I$(TVMPATH)/topi/include
+CFLAGS += -I$(TVMPATH)/include -I$(TVMPATH)/3rdparty/dlpack/include -I$(TVMPATH)/3rdparty/HalideIR/src -I$(TVMPATH)/topi/include
 
 ifdef DMLC_CORE_PATH
   CFLAGS += -I$(DMLC_CORE_PATH)/include
diff --git a/nnvm/amalgamation/Makefile b/nnvm/amalgamation/Makefile
index 1f286f055237..4305339e0075 100644
--- a/nnvm/amalgamation/Makefile
+++ b/nnvm/amalgamation/Makefile
@@ -4,7 +4,7 @@ export CFLAGS = -std=c++11 -Wall -O2 -Iinclude -fPIC
 ifdef DMLC_CORE_PATH
   CFLAGS += -I$(DMLC_CORE_PATH)/include
 else
-  CFLAGS += -I$(CURDIR)/../dmlc-core/include
+  CFLAGS += -I$(CURDIR)/../3rdparty/dmlc-core/include
 endif
 
 .PHONY: all clean
diff --git a/python/setup.py b/python/setup.py
index cbf8c5591703..71d61a52e349 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -74,8 +74,8 @@ def config_cython():
                 "tvm._ffi.%s.%s" % (subdir, fn[:-4]),
                 ["tvm/_ffi/_cython/%s" % fn],
                 include_dirs=["../include/",
-                              "../dmlc-core/include",
-                              "../dlpack/include",
+                              "../3rdparty/dmlc-core/include",
+                              "../3rdparty/dlpack/include",
                 ],
                 library_dirs=library_dirs,
                 libraries=libraries,
diff --git a/src/runtime/builtin_fp16.cc b/src/runtime/builtin_fp16.cc
new file mode 100644
index 000000000000..c259399e05e9
--- /dev/null
+++ b/src/runtime/builtin_fp16.cc
@@ -0,0 +1,21 @@
+/*!
+ * Copyright (c) 2018 by Contributors
+ * \file builtin_fp16.cc
+ * \brief Functions for conversion between fp32 and fp16
+*/
+
+#include <builtin_fp16.h>
+
+namespace tvm {
+namespace runtime {
+
+extern "C"  uint16_t __gnu_f2h_ieee(float a) {
+  return __truncXfYf2__<float, uint32_t, 23, uint16_t, uint16_t, 10>(a);
+}
+
+extern "C" float __gnu_h2f_ieee(uint16_t a) {
+  return __extendXfYf2__<uint16_t, uint16_t, 10, float, uint32_t, 23>(a);
+}
+
+}  // namespace runtime
+}  // namespace tvm
diff --git a/tests/python/unittest/test_runtime_ndarray.py b/tests/python/unittest/test_runtime_ndarray.py
index 9f33e2aabfd8..7be538199a58 100644
--- a/tests/python/unittest/test_runtime_ndarray.py
+++ b/tests/python/unittest/test_runtime_ndarray.py
@@ -35,5 +35,26 @@ def test_nd_create():
         ctx.sync()
 
 
+def test_fp16_conversion():
+    n = 100
+
+    for (src, dst) in [('float32', 'float16'), ('float16', 'float32')]:
+        A = tvm.placeholder((n,), dtype=src)
+        B = tvm.compute((n,), lambda i: A[i].astype(dst))
+
+        s = tvm.create_schedule([B.op])
+        func = tvm.build(s, [A, B], 'llvm')
+
+        x_tvm = tvm.nd.array(100 * np.random.randn(n).astype(src) - 50)
+        y_tvm = tvm.nd.array(100 * np.random.randn(n).astype(dst) - 50)
+
+        func(x_tvm, y_tvm)
+
+        expected = x_tvm.asnumpy().astype(dst)
+        real = y_tvm.asnumpy()
+
+        np.testing.assert_allclose(expected, real)
+
 if __name__ == "__main__":
     test_nd_create()
+    test_fp16_conversion()
diff --git a/vta/python/vta/pkg_config.py b/vta/python/vta/pkg_config.py
index c3fe09effb76..30b4808f5e2d 100644
--- a/vta/python/vta/pkg_config.py
+++ b/vta/python/vta/pkg_config.py
@@ -42,8 +42,8 @@ def __init__(self, cfg, proj_root):
         self.include_path = [
             "-I%s/include" % proj_root,
             "-I%s/vta/include" % proj_root,
-            "-I%s/dlpack/include" % proj_root,
-            "-I%s/dmlc-core/include" % proj_root
+            "-I%s/3rdparty/dlpack/include" % proj_root,
+            "-I%s/3rdparty/dmlc-core/include" % proj_root
         ]
         # List of source files that can be used to build standalone library.
         self.lib_source = []