diff --git a/Jenkinsfile b/Jenkinsfile index b667359f0f2b..db45b6c065e9 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -87,7 +87,6 @@ stage('Build') { cp make/config.mk . echo USE_CUDNN=1 >> config.mk echo USE_CUDA=1 >> config.mk - echo USE_OPENCL=1 >> config.mk echo USE_OPENGL=1 >> config.mk echo LLVM_CONFIG=llvm-config-4.0 >> config.mk echo USE_RPC=1 >> config.mk @@ -105,6 +104,7 @@ stage('Build') { sh "mv lib/libtvm.so lib/libtvm_llvm60.so" pack_lib('gpu', tvm_multilib) sh """ + echo USE_OPENCL=1 >> config.mk echo USE_ROCM=1 >> config.mk echo ROCM_PATH=/opt/rocm >> config.mk echo USE_VULKAN=1 >> config.mk diff --git a/src/runtime/opencl/opencl_device_api.cc b/src/runtime/opencl/opencl_device_api.cc index ae650cbd7b06..6341c9f4b83d 100644 --- a/src/runtime/opencl/opencl_device_api.cc +++ b/src/runtime/opencl/opencl_device_api.cc @@ -145,11 +145,6 @@ std::string GetDeviceInfo( } std::vector GetPlatformIDs() { - // Trigger CUDA initialziation when it exists - // on some NV platform opencl depends on CUDA to be loaded first - if (const PackedFunc* query = runtime::Registry::Get("_GetDeviceAttr")) { - (*query)(static_cast(kDLGPU), 0, static_cast(kExist)); - } cl_uint ret_size; cl_int code = clGetPlatformIDs(0, nullptr, &ret_size); std::vector ret; diff --git a/tests/ci_build/Dockerfile.gpu b/tests/ci_build/Dockerfile.gpu index e49e498b8d40..4b461ebf19c6 100644 --- a/tests/ci_build/Dockerfile.gpu +++ b/tests/ci_build/Dockerfile.gpu @@ -1,7 +1,6 @@ FROM nvidia/cuda:8.0-cudnn7-devel # Base scripts -RUN apt-get update --fix-missing COPY install/ubuntu_install_core.sh /install/ubuntu_install_core.sh RUN bash /install/ubuntu_install_core.sh @@ -12,9 +11,6 @@ RUN bash /install/ubuntu_install_python.sh COPY install/ubuntu_install_llvm.sh /install/ubuntu_install_llvm.sh RUN bash /install/ubuntu_install_llvm.sh -COPY install/ubuntu_install_opencl.sh /install/ubuntu_install_opencl.sh -RUN bash /install/ubuntu_install_opencl.sh - COPY install/ubuntu_install_iverilog.sh /install/ubuntu_install_iverilog.sh RUN bash /install/ubuntu_install_iverilog.sh @@ -40,8 +36,11 @@ RUN bash /install/ubuntu_install_rocm.sh COPY install/ubuntu_install_opengl.sh /install/ubuntu_install_opengl.sh RUN bash /install/ubuntu_install_opengl.sh +COPY install/ubuntu_install_opencl.sh /install/ubuntu_install_opencl.sh +RUN bash /install/ubuntu_install_opencl.sh + # Enable doxygen for c++ doc build -RUN apt-get install -y doxygen graphviz +RUN apt-get update && apt-get install -y doxygen graphviz # Install vulkan COPY install/ubuntu_install_vulkan.sh /install/ubuntu_install_vulkan.sh diff --git a/tests/ci_build/install/ubuntu_install_core.sh b/tests/ci_build/install/ubuntu_install_core.sh index 9823ae0788ac..efc69c946b97 100644 --- a/tests/ci_build/install/ubuntu_install_core.sh +++ b/tests/ci_build/install/ubuntu_install_core.sh @@ -1,5 +1,5 @@ # install libraries for building c++ core on ubuntu -apt-get install -y --no-install-recommends --force-yes \ +apt-get update && apt-get install -y --no-install-recommends --force-yes \ git make libgtest-dev cmake wget unzip libtinfo-dev libz-dev\ libcurl4-openssl-dev libopenblas-dev g++ sudo diff --git a/tests/ci_build/install/ubuntu_install_opencl.sh b/tests/ci_build/install/ubuntu_install_opencl.sh index 636236539a98..ca4d1d04fd5c 100644 --- a/tests/ci_build/install/ubuntu_install_opencl.sh +++ b/tests/ci_build/install/ubuntu_install_opencl.sh @@ -1,8 +1,8 @@ # Install OpenCL runtime in nvidia docker. -apt-get install -y --no-install-recommends --force-yes \ - ocl-icd-libopencl1 \ +apt-get update && apt-get install -y --no-install-recommends --force-yes \ + ocl-icd-opencl-dev \ clinfo && \ - rm -rf /var/lib/apt/lists/* + rm -rf /var/lib/apt/lists/* mkdir -p /etc/OpenCL/vendors && \ echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd diff --git a/tutorials/deployment/cross_compilation_and_rpc.py b/tutorials/deployment/cross_compilation_and_rpc.py index 57f0816e162d..f06bbfca6407 100644 --- a/tutorials/deployment/cross_compilation_and_rpc.py +++ b/tutorials/deployment/cross_compilation_and_rpc.py @@ -239,47 +239,52 @@ # But here we set 'llvm' to enable this tutorial to run locally. # # Also we need to build the runtime with the flag `USE_OPENCL=1`. - # build kernel (different from cpu, we need bind axis for OpenCL) -s = tvm.create_schedule(B.op) -xo, xi = s[B].split(B.op.axis[0], factor=32) -s[B].bind(xo, tvm.thread_axis("blockIdx.x")) -s[B].bind(xi, tvm.thread_axis("threadIdx.x")) -f = tvm.build(s, [A, B], "opencl", target_host="llvm", name="myadd") +# +# The following functions shows how we can deploy CL +def deploy_cl(): + s = tvm.create_schedule(B.op) + xo, xi = s[B].split(B.op.axis[0], factor=32) + s[B].bind(xo, tvm.thread_axis("blockIdx.x")) + s[B].bind(xi, tvm.thread_axis("threadIdx.x")) + f = tvm.build(s, [A, B], "opencl", target_host="llvm", name="myadd") -# save files -path_o = temp.relpath("myadd.o") -path_cl = temp.relpath("myadd.cl") -path_json = temp.relpath("myadd.tvm_meta.json") -f.save(path_o) -f.imported_modules[0].save(path_cl) + # save files + path_o = temp.relpath("myadd.o") + path_cl = temp.relpath("myadd.cl") + path_json = temp.relpath("myadd.tvm_meta.json") + f.save(path_o) + f.imported_modules[0].save(path_cl) -# upload files -remote.upload(path_o) -remote.upload(path_cl) -remote.upload(path_json) + # upload files + remote.upload(path_o) + remote.upload(path_cl) + remote.upload(path_json) -# load files on remote device -fhost = remote.load_module("myadd.o") -fdev = remote.load_module("myadd.cl") -fhost.import_module(fdev) + # load files on remote device + fhost = remote.load_module("myadd.o") + fdev = remote.load_module("myadd.cl") + fhost.import_module(fdev) + + # run + ctx = remote.cl(0) + a = tvm.nd.array(np.random.uniform(size=1024).astype(A.dtype), ctx) + b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx) + fhost(a, b) + np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1) -# run -ctx = remote.cl(0) -a = tvm.nd.array(np.random.uniform(size=1024).astype(A.dtype), ctx) -b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx) -fhost(a, b) -np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1) ##################################################################### # Instead of uploading files separately, there is a more convinient way. # You can export libraray as a tar ball. -path_tar = temp.relpath("myadd.tar") -f.export_library(path_tar) -remote.upload(path_tar) -fhost = remote.load_module("myadd.tar") -fhost(a, b) -np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1) +# The following functions shows how we can deploy by tar ball +def deploy_cl_by_tar(): + path_tar = temp.relpath("myadd.tar") + f.export_library(path_tar) + remote.upload(path_tar) + fhost = remote.load_module("myadd.tar") + fhost(a, b) + np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1) # terminate the server after experiment server.terminate()