Oneflow-Inc · oneflow-ci-bot · Aug 4, 2021 · Aug 3, 2021 · Aug 3, 2021 · Aug 3, 2021
diff --git a/oneflow/api/python/symbol/placement_symbol.cpp b/oneflow/api/python/symbol/placement_symbol.cpp
@@ -19,6 +19,7 @@ limitations under the License.
 #include "oneflow/api/python/of_api_registry.h"
 #include "oneflow/core/control/global_process_ctx.h"
 #include "oneflow/core/common/symbol.h"
+#include "oneflow/core/common/container_util.h"
 #include "oneflow/core/framework/instructions_builder.h"
 #include "oneflow/core/framework/parallel_conf_util.h"
 #include "oneflow/core/job/parallel_desc.h"
@@ -192,6 +193,26 @@ struct PlacementSymbolExportUtil {
                                 + ", hierarchy=" + hierarchy + ")";
     return placement_str;
   }
+
+  static Maybe<Symbol<ParallelDesc>> ReplacePlacementDeviceTag(Symbol<ParallelDesc> parallel_desc,
+                                                               const std::string& device_type) {
+    static const HashMap<std::string, std::string> type2device_tag{{"cpu", "cpu"}, {"cuda", "gpu"}};
+    std::shared_ptr<cfg::ParallelConf> parallel_conf =
+        std::make_shared<cfg::ParallelConf>(*parallel_desc->cfg_parallel_conf());
+    parallel_conf->set_device_tag(JUST(MapAt(type2device_tag, device_type)));
+    std::shared_ptr<ParallelDesc> out_parallel_desc;
+    JUST(LogicalRun(
+        [&out_parallel_desc, &parallel_conf](InstructionsBuilder* builder) -> Maybe<void> {
+          out_parallel_desc = JUST(builder->GetParallelDescSymbol(parallel_conf));
+          return Maybe<void>::Ok();
+        }));
+    return SymbolOf(*out_parallel_desc);
+  }
+
+  static Symbol<ParallelDesc> ApiReplacePlacementDeviceTag(Symbol<ParallelDesc> parallel_desc,
+                                                           const std::string& device_type) {
+    return ReplacePlacementDeviceTag(parallel_desc, device_type).GetOrThrow();
+  }
 };
 
 }  // namespace
@@ -244,6 +265,7 @@ ONEFLOW_API_PYBIND11_MODULE("", m) {
       .def(py::self == py::self)
       .def(py::hash(py::self));
   m.def("AllDevicePlacement", &PlacementSymbolExportUtil::AllDevicePlacement);
+  m.def("_ReplacePlacementDeviceTag", &PlacementSymbolExportUtil::ApiReplacePlacementDeviceTag);
 }
 
 }  // namespace oneflow
diff --git a/python/oneflow/nn/modules/to.py b/python/oneflow/nn/modules/to.py
@@ -36,6 +36,23 @@ def forward(self, x, device, dtype):
         return result
 
 
+def ConsistentTo(input, device):
+    assert device in (
+        "cuda",
+        "cpu",
+    ), 'consistent tensor only support to("cuda") or to("cpu")'
+    if device == input.placement.device_type:
+        return input
+    out_placement = flow._oneflow_internal._ReplacePlacementDeviceTag(
+        input.placement, device
+    )
+    sbp = input.sbp
+    input_local_tensor = input.to_local()
+    device = flow.device(device)
+    output_local_tensor = To(False)(input_local_tensor, device, None)
+    return output_local_tensor.to_consistent(out_placement, sbp)
+
+
 @register_tensor_op("to")
 def to_op(input, *args, **kwargs):
     """Performs Tensor dtype and/or device conversion. 
@@ -71,6 +88,18 @@ def to_op(input, *args, **kwargs):
     copy = kwargs.get("copy", False)
     device = kwargs.get("device", None)
     dtype = kwargs.get("dtype", None)
+    if input.is_consistent:
+        input.check_meta_consistency()
+        if len(args) > 0:
+            assert args[0] in (
+                "cuda",
+                "cpu",
+            ), 'consistent tensor only support to("cuda") or to("cpu")'
+            return ConsistentTo(input, args[0])
+        if device in ("cuda", "cpu"):
+            return ConsistentTo(input, device)
+        raise TypeError("to() received an invalid combination of arguments")
+
     if len(args) > 0:
         if isinstance(args[0], flow.Tensor):
             if len(args) == 2: