diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
index 022454afd..f145d8425 100644
--- a/.buildkite/pipeline.yml
+++ b/.buildkite/pipeline.yml
@@ -4,7 +4,7 @@ steps:
     command: bash .buildkite/scripts/benchmark_master.sh
     plugins:
       - docker#v3.8.0:
-          image: "baguasys/bagua:latest"
+          image: "baguasys/bagua:master-pytorch-1.9.0-cuda11.1-cudnn8"
           workdir: /upstream
           user: root
           propagate-environment: true
@@ -20,7 +20,7 @@ steps:
     command: bash .buildkite/scripts/benchmark_worker.sh
     plugins:
       - docker#v3.8.0:
-          image: "baguasys/bagua:latest"
+          image: "baguasys/bagua:master-pytorch-1.9.0-cuda11.1-cudnn8"
           workdir: /upstream
           user: root
           propagate-environment: true
@@ -34,7 +34,7 @@ steps:
     command: bash .buildkite/scripts/benchmark.sh
     plugins:
       - docker#v3.8.0:
-          image: "baguasys/bagua:latest"
+          image: "baguasys/bagua:master-pytorch-1.9.0-cuda11.1-cudnn8"
           workdir: /upstream
           user: root
           propagate-environment: true
@@ -48,7 +48,7 @@ steps:
     command: bash .buildkite/scripts/run_pytest.sh
     plugins:
       - docker#v3.8.0:
-          image: "baguasys/bagua:latest"
+          image: "baguasys/bagua:master-pytorch-1.9.0-cuda11.1-cudnn8"
           workdir: /upstream
           user: root
           propagate-environment: true
diff --git a/.github/workflows/bagua-pypi-publish.yml b/.github/workflows/bagua-pypi-publish.yml
index add242789..bc86f6a95 100644
--- a/.github/workflows/bagua-pypi-publish.yml
+++ b/.github/workflows/bagua-pypi-publish.yml
@@ -9,7 +9,7 @@ concurrency:
 jobs:
   publish:
     runs-on: ubuntu-latest
-    container: baguasys/bagua:latest
+    container: baguasys/bagua:master-pytorch-1.9.0-cuda11.1-cudnn8
     steps:
       - uses: actions/checkout@v2
         with:
@@ -38,7 +38,7 @@ jobs:
 
   check_source_install:
     runs-on: ubuntu-latest
-    container: baguasys/bagua:latest
+    container: baguasys/bagua:master-pytorch-1.9.0-cuda11.1-cudnn8
     needs:
       - publish
     steps:
@@ -115,7 +115,6 @@ jobs:
         with:
           fetch-depth: 0
           submodules: recursive
-
       - name: setup python
         uses: actions/setup-python@v2
         with:
diff --git a/.github/workflows/bagua-python-package-check.yml b/.github/workflows/bagua-python-package-check.yml
index b38746129..01d753a9d 100644
--- a/.github/workflows/bagua-python-package-check.yml
+++ b/.github/workflows/bagua-python-package-check.yml
@@ -12,7 +12,7 @@ on:
 jobs:
   build:
     runs-on: ubuntu-latest
-    container: baguasys/bagua:latest
+    container: baguasys/bagua:master-pytorch-1.9.0-cuda11.1-cudnn8
     steps:
     - uses: actions/checkout@v2
       with:
diff --git a/.github/workflows/dockerhub.yml b/.github/workflows/dockerhub.yml
index a2d9e3340..f96077819 100644
--- a/.github/workflows/dockerhub.yml
+++ b/.github/workflows/dockerhub.yml
@@ -17,6 +17,9 @@ jobs:
           - cuda-version: "cuda11.1"
             cudnn-version: "cudnn8"
             pytorch-version: "pytorch-1.9.0"
+          - cuda-version: "cuda11.3"
+            cudnn-version: "cudnn8"
+            pytorch-version: "pytorch-1.10.0"
     name: 'Build'
     runs-on: ubuntu-latest
     steps:
diff --git a/.github/workflows/pytype.yml b/.github/workflows/pytype.yml
index 816b61439..271314065 100644
--- a/.github/workflows/pytype.yml
+++ b/.github/workflows/pytype.yml
@@ -19,7 +19,7 @@ jobs:
   build:
     # The type of runner that the job will run on
     runs-on: ubuntu-latest
-    container: baguasys/bagua:latest
+    container: baguasys/bagua:master-pytorch-1.9.0-cuda11.1-cudnn8
 
     # Steps represent a sequence of tasks that will be executed as part of the job
     steps:
diff --git a/bagua/torch_api/algorithms/decentralized.py b/bagua/torch_api/algorithms/decentralized.py
index fc2130392..317781786 100644
--- a/bagua/torch_api/algorithms/decentralized.py
+++ b/bagua/torch_api/algorithms/decentralized.py
@@ -87,7 +87,7 @@ def hook():
 
     def _init_states(self, bucket: BaguaBucket):
         weight_tensor = bucket.flattened_tensor()
-        bucket._peer_weight = weight_tensor.to_bagua_tensor("peer_weight")
+        bucket._peer_weight = weight_tensor.ensure_bagua_tensor("peer_weight")
 
     def init_operations(
         self,
@@ -182,11 +182,11 @@ def _init_states(self, bucket: BaguaBucket):
         left_peer_weight_tensor = bucket.flattened_tensor()
         right_peer_weight_tensor = bucket.flattened_tensor()
 
-        bucket._weight = weight_tensor.to_bagua_tensor("weight")
-        bucket._left_peer_weight = left_peer_weight_tensor.to_bagua_tensor(
+        bucket._weight = weight_tensor.ensure_bagua_tensor("weight")
+        bucket._left_peer_weight = left_peer_weight_tensor.ensure_bagua_tensor(
             "left_peer_weight"
         )
-        bucket._right_peer_weight = right_peer_weight_tensor.to_bagua_tensor(
+        bucket._right_peer_weight = right_peer_weight_tensor.ensure_bagua_tensor(
             "right_peer_weight"
         )
 
diff --git a/bagua/torch_api/bucket.py b/bagua/torch_api/bucket.py
index cec70b29f..c04ddb4ac 100644
--- a/bagua/torch_api/bucket.py
+++ b/bagua/torch_api/bucket.py
@@ -55,7 +55,7 @@ def __init__(
                 # padding tensor must be of name bagua_padding_tensor, so that they are always marked as ready for communication in the backend
                 self.padding_tensor = torch.zeros(
                     padding, dtype=self.tensors[0].dtype, device=self.tensors[0].device
-                ).to_bagua_tensor(
+                ).ensure_bagua_tensor(
                     "bagua_padding_tensor_bucket_" + name,
                     module_name=self.bagua_module_name,
                 )
@@ -243,7 +243,7 @@ def append_decentralized_synchronous_op(
 
         Args:
             peer_weight (BaguaTensor):  A tensor used for averaging model with peers, should be of the same size
-                with the bucket tensors total size. Use ``self.flattened_tensor().to_bagua_tensor(...)`` to create such a tensor.
+                with the bucket tensors total size. Use ``self.flattened_tensor().ensure_bagua_tensor(...)`` to create such a tensor.
             hierarchical (bool): Enable hierarchical communication. Which means the GPUs on the same machine
                 will communicate will each other first. After that, machines do inter-node communication. This can
                 boost performance when the inter-node communication cost is high.
@@ -292,12 +292,12 @@ def append_low_precision_decentralized_synchronous_op(
 
         Args:
             weight (BaguaTensor): Model replica of current worker's local model. It should be of the same size
-                with the bucket tensors total size. Use ``self.flattened_tensor().to_bagua_tensor(...)`` to create such a tensor.
+                with the bucket tensors total size. Use ``self.flattened_tensor().ensure_bagua_tensor(...)`` to create such a tensor.
             left_peer_weight (BaguaTensor): Model replica of current worker's left peer. It should be of the same size
-                with the bucket tensors total size. Use ``self.flattened_tensor().to_bagua_tensor(...)`` to create such a tensor,
+                with the bucket tensors total size. Use ``self.flattened_tensor().ensure_bagua_tensor(...)`` to create such a tensor,
                 then copy the initializing weights of current worker's left peer to the tensor.
             right_peer_weight (BaguaTensor): Model replica of current worker's right peer. It should be of the same size
-                with the bucket tensors total size. Use ``self.flattened_tensor().to_bagua_tensor(...)`` to create such a tensor.
+                with the bucket tensors total size. Use ``self.flattened_tensor().ensure_bagua_tensor(...)`` to create such a tensor.
                 then copy the initializing weights of current worker's right peer to the tensor.
             hierarchical (bool): Enable hierarchical communication. Which means the GPUs on the same machine
                 will communicate will each other first. After that, machines do inter-node communication. This can
diff --git a/bagua/torch_api/tensor.py b/bagua/torch_api/tensor.py
index be436fc81..b31d63c70 100644
--- a/bagua/torch_api/tensor.py
+++ b/bagua/torch_api/tensor.py
@@ -14,7 +14,7 @@ class BaguaTensor:
     with additional methods.
 
     A Bagua tensor is required to use Bagua's communication algorithms. Users can convert a PyTorch tensor to Bagua
-    tensor by :meth:`ensure_bagua_tensor` or :meth:`to_bagua_tensor`.
+    tensor by :meth:`ensure_bagua_tensor`.
 
     Bagua tensor features a proxy structure, where the actual tensor used by backend is accessed via a **"Proxy Tensor"**.
     The proxy tensor is registered in Bagua, whenever the Bagua backend needs a tensor (for example use it for
@@ -134,24 +134,6 @@ def ensure_bagua_tensor(
         self._bagua_bucket = None
         return self
 
-    def bagua_getter_closure(self) -> torch.Tensor:
-        """Returns the tensor that will be used in runtime."""
-        return (
-            self._bagua_getter_closure(self)
-            if self._bagua_getter_closure is not None
-            else self
-        )
-
-    def bagua_setter_closure(self, tensor: torch.Tensor):
-        """Sets the tensor that will be used in runtime to a new Pytorch tensor :attr:`tensor`.
-
-        Args:
-            tensor: The new tensor to be set to.
-        """
-
-        assert self._bagua_setter_closure is not None
-        self._bagua_setter_closure(self, tensor)
-
     def to_bagua_tensor(
         self,
         name: Optional[str] = None,
@@ -161,9 +143,13 @@ def to_bagua_tensor(
     ):
         """
         Create a new Bagua tensor from a PyTorch tensor or parameter and return it.
-        The original tensor is not changed. A Bagua tensor is required to use
-        Bagua's communication algorithms. See :meth:`ensure_bagua_tensor` for more
-        information.
+        The new Bagua tensor will share the same storage with the input PyTorch tensor.
+        A Bagua tensor is required to use Bagua's communication algorithms.
+        See :meth:`ensure_bagua_tensor` for more information.
+
+        Caveat: Be aware that if the original tensor changes to use a different storage
+        using for example ``torch.Tensor.set_(...)``, the new Bagua tensor will still
+        use the old storage.
 
         Args:
             name: The unique name of the tensor.
@@ -173,15 +159,32 @@ def to_bagua_tensor(
             getter_closure: A function that accepts a Pytorch tensor as its input and returns a Pytorch tensor as
               its output. See :meth:`ensure_bagua_tensor`.
             setter_closure: A function that accepts two Pytorch tensors as its inputs and returns nothing. See :meth:`ensure_bagua_tensor`.
-
         Returns:
             The new Bagua tensor sharing the same storage with the original tensor.
         """
-        new_tensor = torch.Tensor(cdata=self._cdata)
+        new_tensor = self.view(self.dtype)
         return new_tensor.ensure_bagua_tensor(
             name, module_name, getter_closure, setter_closure
         )
 
+    def bagua_getter_closure(self) -> torch.Tensor:
+        """Returns the tensor that will be used in runtime."""
+        return (
+            self._bagua_getter_closure(self)
+            if self._bagua_getter_closure is not None
+            else self
+        )
+
+    def bagua_setter_closure(self, tensor: torch.Tensor):
+        """Sets the tensor that will be used in runtime to a new Pytorch tensor :attr:`tensor`.
+
+        Args:
+            tensor: The new tensor to be set to.
+        """
+
+        assert self._bagua_setter_closure is not None
+        self._bagua_setter_closure(self, tensor)
+
     def bagua_backend_tensor(self) -> B.BaguaTensorPy:
         """
         Returns:
diff --git a/docker/Dockerfile.pytorch-1.10.0-cuda11.3-cudnn8 b/docker/Dockerfile.pytorch-1.10.0-cuda11.3-cudnn8
new file mode 100644
index 000000000..b7eea817a
--- /dev/null
+++ b/docker/Dockerfile.pytorch-1.10.0-cuda11.3-cudnn8
@@ -0,0 +1,56 @@
+FROM pytorch/pytorch:1.10.0-cuda11.3-cudnn8-devel
+
+RUN apt-get update && apt-get install -y curl software-properties-common wget sudo
+RUN add-apt-repository ppa:git-core/ppa -y
+RUN sed -i 's/mozilla\/DST_Root_CA_X3.crt/!mozilla\/DST_Root_CA_X3.crt/g' /etc/ca-certificates.conf && update-ca-certificates
+RUN curl -sSf https://apt.kitware.com/kitware-archive.sh | sh
+RUN apt-get update && apt-get install -y git cmake
+RUN curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain stable -y
+ENV PATH=/root/.cargo/bin:${PATH}
+RUN cargo install mdbook mdbook-linkcheck mdbook-katex mdbook-open-on-gh
+
+RUN yes | python3 -m pip install -U setuptools wheel build pip
+
+ENV LD_LIBRARY_PATH="/usr/local/cuda/lib64/stubs/:/usr/local/lib64:/usr/local/lib"
+ENV LIBRARY_PATH="/usr/local/cuda/lib64/stubs/:/usr/local/lib64:/usr/local/lib"
+ENV PKG_CONFIG_PATH="/usr/local/cuda/pkgconfig/"
+ENV CUDA_LIBRARY_PATH="/usr/local/cuda/lib64/"
+
+
+# OpenMPI version 4.0.3
+RUN apt-get update -y && \
+    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
+        bzip2 \
+        file \
+        hwloc \
+        libnuma-dev \
+        make \
+        openssh-client \
+        perl \
+        tar \
+        wget && \
+    rm -rf /var/lib/apt/lists/*
+RUN mkdir -p /var/tmp && wget -q -nc --no-check-certificate -P /var/tmp https://www.open-mpi.org/software/ompi/v4.0/downloads/openmpi-4.0.3.tar.bz2 && \
+    mkdir -p /var/tmp && tar -x -f /var/tmp/openmpi-4.0.3.tar.bz2 -C /var/tmp -j && \
+    cd /var/tmp/openmpi-4.0.3 &&   ./configure --disable-getpwuid --disable-oshmem --enable-fortran --enable-mca-no-build=btl-uct --enable-orterun-prefix-by-default --with-cuda --without-verbs && \
+    make -j$(nproc) && \
+    make -j$(nproc) install && \
+    rm -rf /var/tmp/openmpi-4.0.3 /var/tmp/openmpi-4.0.3.tar.bz2 && cd -
+
+# hwloc
+RUN mkdir -p /var/tmp && wget -q -nc --no-check-certificate -P /var/tmp https://download.open-mpi.org/release/hwloc/v2.5/hwloc-2.5.0.tar.bz2 && \
+    mkdir -p /var/tmp && tar -x -f /var/tmp/hwloc-2.5.0.tar.bz2 -C /var/tmp -j && \
+    cd /var/tmp/hwloc-2.5.0 &&   ./configure && \
+    make -j$(nproc) && \
+    make -j$(nproc) install && \
+    rm -rf /var/tmp/hwloc* && cd -
+
+# Redis
+RUN add-apt-repository ppa:redislabs/redis
+RUN apt-get update && apt-get install -y redis
+RUN yes | python3 -m pip install -U redis
+
+RUN mkdir /bagua 
+COPY examples/ /bagua/examples
+COPY ./ /var/tmp/bagua
+RUN cd /var/tmp/bagua && python3 -m pip install . && cd - && rm -rf /var/tmp/bagua
diff --git a/docker/Dockerfile.pytorch-1.9.0-cuda10.2-cudnn7 b/docker/Dockerfile.pytorch-1.9.0-cuda10.2-cudnn7
index 4524fd4d8..5a6442c83 100644
--- a/docker/Dockerfile.pytorch-1.9.0-cuda10.2-cudnn7
+++ b/docker/Dockerfile.pytorch-1.9.0-cuda10.2-cudnn7
@@ -1,6 +1,7 @@
 FROM pytorch/pytorch:1.9.0-cuda10.2-cudnn7-devel
 
-RUN apt-get update && apt-get install -y curl software-properties-common wget
+RUN apt-get update && apt-get install -y curl software-properties-common wget sudo
+RUN add-apt-repository ppa:git-core/ppa -y
 RUN sed -i 's/mozilla\/DST_Root_CA_X3.crt/!mozilla\/DST_Root_CA_X3.crt/g' /etc/ca-certificates.conf && update-ca-certificates
 RUN curl -sSf https://apt.kitware.com/kitware-archive.sh | sh
 RUN apt-get update && apt-get install -y git cmake
diff --git a/docker/Dockerfile.pytorch-1.9.0-cuda11.1-cudnn8 b/docker/Dockerfile.pytorch-1.9.0-cuda11.1-cudnn8
index 10018c0fc..78f12d072 100644
--- a/docker/Dockerfile.pytorch-1.9.0-cuda11.1-cudnn8
+++ b/docker/Dockerfile.pytorch-1.9.0-cuda11.1-cudnn8
@@ -1,6 +1,7 @@
 FROM pytorch/pytorch:1.9.0-cuda11.1-cudnn8-devel
 
-RUN apt-get update && apt-get install -y curl software-properties-common wget
+RUN apt-get update && apt-get install -y curl software-properties-common wget sudo
+RUN add-apt-repository ppa:git-core/ppa -y
 RUN sed -i 's/mozilla\/DST_Root_CA_X3.crt/!mozilla\/DST_Root_CA_X3.crt/g' /etc/ca-certificates.conf && update-ca-certificates
 RUN curl -sSf https://apt.kitware.com/kitware-archive.sh | sh
 RUN apt-get update && apt-get install -y git cmake
diff --git a/tests/comm/test_communicator.py b/tests/comm/test_communicator.py
index 28ff1fc8d..368506924 100644
--- a/tests/comm/test_communicator.py
+++ b/tests/comm/test_communicator.py
@@ -56,7 +56,7 @@ def abort():
         data = torch.rand(10).cuda()
 
     for _ in range(rank + 1):
-        comm.allreduce_inplace(data.to_bagua_tensor().bagua_backend_tensor(), 10)
+        comm.allreduce_inplace(data.ensure_bagua_tensor().bagua_backend_tensor(), 10)
 
     comm_stream.synchronize()