diff --git a/Jenkinsfile b/Jenkinsfile
index b667359f0f2b..db45b6c065e9 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -87,7 +87,6 @@ stage('Build') {
            cp make/config.mk .
            echo USE_CUDNN=1 >> config.mk
            echo USE_CUDA=1 >> config.mk
-           echo USE_OPENCL=1 >> config.mk
            echo USE_OPENGL=1 >> config.mk
            echo LLVM_CONFIG=llvm-config-4.0 >> config.mk
            echo USE_RPC=1 >> config.mk
@@ -105,6 +104,7 @@ stage('Build') {
         sh "mv lib/libtvm.so lib/libtvm_llvm60.so"
         pack_lib('gpu', tvm_multilib)
         sh """
+           echo USE_OPENCL=1 >> config.mk
            echo USE_ROCM=1 >> config.mk
            echo ROCM_PATH=/opt/rocm >> config.mk
            echo USE_VULKAN=1 >> config.mk
diff --git a/src/runtime/opencl/opencl_device_api.cc b/src/runtime/opencl/opencl_device_api.cc
index ae650cbd7b06..6341c9f4b83d 100644
--- a/src/runtime/opencl/opencl_device_api.cc
+++ b/src/runtime/opencl/opencl_device_api.cc
@@ -145,11 +145,6 @@ std::string GetDeviceInfo(
 }
 
 std::vector<cl_platform_id> GetPlatformIDs() {
-  // Trigger CUDA initialziation when it exists
-  // on some NV platform opencl depends on CUDA to be loaded first
-  if (const PackedFunc* query = runtime::Registry::Get("_GetDeviceAttr")) {
-    (*query)(static_cast<int>(kDLGPU), 0, static_cast<int>(kExist));
-  }
   cl_uint ret_size;
   cl_int code = clGetPlatformIDs(0, nullptr, &ret_size);
   std::vector<cl_platform_id> ret;
diff --git a/tests/ci_build/Dockerfile.gpu b/tests/ci_build/Dockerfile.gpu
index e49e498b8d40..4b461ebf19c6 100644
--- a/tests/ci_build/Dockerfile.gpu
+++ b/tests/ci_build/Dockerfile.gpu
@@ -1,7 +1,6 @@
 FROM nvidia/cuda:8.0-cudnn7-devel
 
 # Base scripts
-RUN apt-get update --fix-missing
 
 COPY install/ubuntu_install_core.sh /install/ubuntu_install_core.sh
 RUN bash /install/ubuntu_install_core.sh
@@ -12,9 +11,6 @@ RUN bash /install/ubuntu_install_python.sh
 COPY install/ubuntu_install_llvm.sh /install/ubuntu_install_llvm.sh
 RUN bash /install/ubuntu_install_llvm.sh
 
-COPY install/ubuntu_install_opencl.sh /install/ubuntu_install_opencl.sh
-RUN bash /install/ubuntu_install_opencl.sh
-
 COPY install/ubuntu_install_iverilog.sh /install/ubuntu_install_iverilog.sh
 RUN bash /install/ubuntu_install_iverilog.sh
 
@@ -40,8 +36,11 @@ RUN bash /install/ubuntu_install_rocm.sh
 COPY install/ubuntu_install_opengl.sh /install/ubuntu_install_opengl.sh
 RUN bash /install/ubuntu_install_opengl.sh
 
+COPY install/ubuntu_install_opencl.sh /install/ubuntu_install_opencl.sh
+RUN bash /install/ubuntu_install_opencl.sh
+
 # Enable doxygen for c++ doc build
-RUN apt-get install -y doxygen graphviz
+RUN apt-get update && apt-get install -y doxygen graphviz
 
 # Install vulkan
 COPY install/ubuntu_install_vulkan.sh /install/ubuntu_install_vulkan.sh
diff --git a/tests/ci_build/install/ubuntu_install_core.sh b/tests/ci_build/install/ubuntu_install_core.sh
index 9823ae0788ac..efc69c946b97 100644
--- a/tests/ci_build/install/ubuntu_install_core.sh
+++ b/tests/ci_build/install/ubuntu_install_core.sh
@@ -1,5 +1,5 @@
 # install libraries for building c++ core on ubuntu
-apt-get install -y --no-install-recommends --force-yes \
+apt-get update && apt-get install -y --no-install-recommends --force-yes \
         git make libgtest-dev cmake wget unzip libtinfo-dev libz-dev\
         libcurl4-openssl-dev libopenblas-dev g++ sudo
 
diff --git a/tests/ci_build/install/ubuntu_install_opencl.sh b/tests/ci_build/install/ubuntu_install_opencl.sh
index 636236539a98..ca4d1d04fd5c 100644
--- a/tests/ci_build/install/ubuntu_install_opencl.sh
+++ b/tests/ci_build/install/ubuntu_install_opencl.sh
@@ -1,8 +1,8 @@
 # Install OpenCL runtime in nvidia docker.
-apt-get install -y --no-install-recommends --force-yes \
-        ocl-icd-libopencl1 \
+apt-get update && apt-get install -y --no-install-recommends --force-yes \
+        ocl-icd-opencl-dev \
         clinfo && \
-        rm -rf /var/lib/apt/lists/*
+    rm -rf /var/lib/apt/lists/*
 
 mkdir -p /etc/OpenCL/vendors && \
     echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd
diff --git a/tutorials/deployment/cross_compilation_and_rpc.py b/tutorials/deployment/cross_compilation_and_rpc.py
index 57f0816e162d..f06bbfca6407 100644
--- a/tutorials/deployment/cross_compilation_and_rpc.py
+++ b/tutorials/deployment/cross_compilation_and_rpc.py
@@ -239,47 +239,52 @@
 #    But here we set 'llvm' to enable this tutorial to run locally.
 #
 #    Also we need to build the runtime with the flag `USE_OPENCL=1`.
-
 # build kernel (different from cpu, we need bind axis for OpenCL)
-s = tvm.create_schedule(B.op)
-xo, xi = s[B].split(B.op.axis[0], factor=32)
-s[B].bind(xo, tvm.thread_axis("blockIdx.x"))
-s[B].bind(xi, tvm.thread_axis("threadIdx.x"))
-f = tvm.build(s, [A, B], "opencl", target_host="llvm", name="myadd")
+#
+# The following functions shows how we can deploy CL
+def deploy_cl():
+    s = tvm.create_schedule(B.op)
+    xo, xi = s[B].split(B.op.axis[0], factor=32)
+    s[B].bind(xo, tvm.thread_axis("blockIdx.x"))
+    s[B].bind(xi, tvm.thread_axis("threadIdx.x"))
+    f = tvm.build(s, [A, B], "opencl", target_host="llvm", name="myadd")
 
-# save files
-path_o = temp.relpath("myadd.o")
-path_cl = temp.relpath("myadd.cl")
-path_json = temp.relpath("myadd.tvm_meta.json")
-f.save(path_o)
-f.imported_modules[0].save(path_cl)
+    # save files
+    path_o = temp.relpath("myadd.o")
+    path_cl = temp.relpath("myadd.cl")
+    path_json = temp.relpath("myadd.tvm_meta.json")
+    f.save(path_o)
+    f.imported_modules[0].save(path_cl)
 
-# upload files
-remote.upload(path_o)
-remote.upload(path_cl)
-remote.upload(path_json)
+    # upload files
+    remote.upload(path_o)
+    remote.upload(path_cl)
+    remote.upload(path_json)
 
-# load files on remote device
-fhost = remote.load_module("myadd.o")
-fdev = remote.load_module("myadd.cl")
-fhost.import_module(fdev)
+    # load files on remote device
+    fhost = remote.load_module("myadd.o")
+    fdev = remote.load_module("myadd.cl")
+    fhost.import_module(fdev)
+
+    # run
+    ctx = remote.cl(0)
+    a = tvm.nd.array(np.random.uniform(size=1024).astype(A.dtype), ctx)
+    b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx)
+    fhost(a, b)
+    np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)
 
-# run
-ctx = remote.cl(0)
-a = tvm.nd.array(np.random.uniform(size=1024).astype(A.dtype), ctx)
-b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx)
-fhost(a, b)
-np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)
 
 #####################################################################
 # Instead of uploading files separately, there is a more convinient way.
 # You can export libraray as a tar ball.
-path_tar = temp.relpath("myadd.tar")
-f.export_library(path_tar)
-remote.upload(path_tar)
-fhost = remote.load_module("myadd.tar")
-fhost(a, b)
-np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)
+# The following functions shows how we can deploy by tar ball
+def deploy_cl_by_tar():
+    path_tar = temp.relpath("myadd.tar")
+    f.export_library(path_tar)
+    remote.upload(path_tar)
+    fhost = remote.load_module("myadd.tar")
+    fhost(a, b)
+    np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)
 
 # terminate the server after experiment
 server.terminate()