Oneflow-Inc · hjchen2 · Sep 18, 2022 · Sep 13, 2022 · Sep 13, 2022 · Sep 13, 2022
diff --git a/oneflow/api/python/framework/autocast.cpp b/oneflow/api/python/framework/autocast.cpp
@@ -0,0 +1,90 @@
+/*
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+#include <pybind11/pybind11.h>
+#include "oneflow/api/python/of_api_registry.h"
+
+#include "oneflow/core/common/throw.h"
+#include "oneflow/core/framework/autocast.h"
+
+namespace py = pybind11;
+
+namespace oneflow {
+
+class AutoCastMode {
+ public:
+  OF_DISALLOW_COPY_AND_MOVE(AutoCastMode);
+
+  AutoCastMode(const std::string& device_type, Symbol<DType> dtype, bool enabled,
+               bool cache_enabled)
+      : prev_enabled_(autocast::is_enabled()),
+        prev_cache_enabled_(autocast::is_autocast_cache_enabled()),
+        prev_device_type_(autocast::get_autocast_device_type()),
+        prev_dtype_(autocast::get_autocast_dtype()),
+        prev_gpu_dtype_(autocast::get_autocast_gpu_dtype()),
+        prev_cpu_dtype_(autocast::get_autocast_cpu_dtype()) {
+    // update autocast state
+    autocast::set_enabled(enabled);
+    autocast::set_autocast_cache_enabled(cache_enabled);
+    if (device_type == "cpu") {
+      autocast::set_autocast_device_type(kCPU);
+      autocast::set_autocast_dtype(dtype);
+      autocast::set_autocast_cpu_dtype(dtype);
+    } else if (device_type == "cuda") {
+      autocast::set_autocast_device_type(kCUDA);
+      autocast::set_autocast_dtype(dtype);
+      autocast::set_autocast_gpu_dtype(dtype);
+    } else {
+      THROW(RuntimeError) << "User specified autocast device_type must be 'cuda' or 'cpu'";
+    }
+  }
+
+  ~AutoCastMode() {
+    autocast::set_enabled(prev_enabled_);
+    autocast::set_autocast_cache_enabled(prev_cache_enabled_);
+    autocast::set_autocast_device_type(prev_device_type_);
+    autocast::set_autocast_dtype(prev_dtype_);
+    autocast::set_autocast_gpu_dtype(prev_gpu_dtype_);
+    autocast::set_autocast_cpu_dtype(prev_cpu_dtype_);
+  }
+
+ private:
+  bool prev_enabled_;
+  bool prev_cache_enabled_;
+  DeviceType prev_device_type_;
+  Symbol<DType> prev_dtype_;
+  Symbol<DType> prev_gpu_dtype_;
+  Symbol<DType> prev_cpu_dtype_;
+};
+
+ONEFLOW_API_PYBIND11_MODULE("", m) {
+  py::class_<AutoCastMode, std::shared_ptr<AutoCastMode>>(m, "AutoCastMode")
+      .def(py::init([](const std::string& device_type, Symbol<DType> dtype, bool enabled,
+                       bool cache_enabled) {
+        return std::make_shared<AutoCastMode>(device_type, dtype, enabled, cache_enabled);
+      }));
+
+  m.def("is_autocast_enabled", autocast::is_enabled);
+  m.def("set_autocast_enabled", autocast::set_enabled);
+  m.def("get_autocast_gpu_dtype", autocast::get_autocast_gpu_dtype);
+  m.def("get_autocast_cpu_dtype", autocast::get_autocast_cpu_dtype);
+  m.def("set_autocast_gpu_dtype", autocast::set_autocast_gpu_dtype);
+  m.def("set_autocast_cpu_dtype", autocast::set_autocast_cpu_dtype);
+  m.def("is_autocast_cache_enabled", autocast::is_autocast_cache_enabled);
+  m.def("set_autocast_cache_enabled", autocast::set_autocast_cache_enabled);
+  m.def("clear_autocast_cache", autocast::clear_cache);
+}
+
+}  // namespace oneflow
@@ -0,0 +1,149 @@
+/*
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+#include "oneflow/core/common/throw.h"
+#include "oneflow/core/framework/autocast.h"
+#include "oneflow/core/job_rewriter/auto_mixed_precision.h"
+#include "oneflow/core/job_rewriter/auto_mixed_precision_lists.h"
+
+namespace oneflow {
+namespace autocast {
+
+namespace {
+
+bool* autocast_enabled() {
+  static thread_local bool autocast_enabled = false;
+  return &autocast_enabled;
+}
+DeviceType* autocast_device_type() {
+  static thread_local DeviceType autocast_device_type = kCUDA;
+  return &autocast_device_type;
+}
+Symbol<DType>* autocast_dtype() {
+  static thread_local Symbol<DType> autocast_dtype = DType::Float16();
+  return &autocast_dtype;
+}
+Symbol<DType>* autocast_cpu_dtype() {
+  static thread_local Symbol<DType> autocast_cpu_dtype = DType::BFloat16();
+  return &autocast_cpu_dtype;
+}
+Symbol<DType>* autocast_gpu_dtype() {
+  static thread_local Symbol<DType> autocast_gpu_dtype = DType::Float16();
+  return &autocast_gpu_dtype;
+}
+bool* cache_enabled() {
+  static thread_local bool cache_enabled = true;
+  return &cache_enabled;
+}
+
+}  // namespace
+
+bool is_enabled() { return *autocast_enabled(); }
+void set_enabled(bool enabled) { *autocast_enabled() = enabled; }
+
+DeviceType get_autocast_device_type() { return *autocast_device_type(); }
+void set_autocast_device_type(DeviceType device_type) { *autocast_device_type() = device_type; }
+
+Symbol<DType> get_autocast_dtype() { return *autocast_dtype(); }
+Symbol<DType> get_autocast_cpu_dtype() { return *autocast_cpu_dtype(); }
+Symbol<DType> get_autocast_gpu_dtype() { return *autocast_gpu_dtype(); }
+
+void set_autocast_dtype(Symbol<DType> dtype) { *autocast_dtype() = dtype; }
+void set_autocast_cpu_dtype(Symbol<DType> dtype) { *autocast_cpu_dtype() = dtype; }
+void set_autocast_gpu_dtype(Symbol<DType> dtype) { *autocast_gpu_dtype() = dtype; }
+
+bool is_autocast_cache_enabled() { return *cache_enabled(); }
+void set_autocast_cache_enabled(bool enabled) { *cache_enabled() = enabled; }
+void clear_cache() {
+  // TODO(hjchen2)
+}
+
+AutoCastColor AutoCastMeta::autocast_color() const { return autocast_color_; }
+
+void AutoCastMeta::set_autocast_color(AutoCastColor color) { autocast_color_ = color; }
+
+bool AutoCastMeta::is_autocast_eligible(DeviceType device_type, Symbol<DType> dtype) const {
+  int device_index = static_cast<int>(device_type);
+  if (is_autocast_eligible_.size() > device_index) {
+    int dtype_index = static_cast<int>(dtype->data_type());
+    if (is_autocast_eligible_[device_index].size() > dtype_index) {
+      return is_autocast_eligible_[device_index][dtype_index];
+    }
+  }
+  return false;
+}
+
+void AutoCastMeta::set_autocast_eligible(DeviceType device_type, Symbol<DType> dtype) {
+  int device_index = static_cast<int>(device_type);
+  while (is_autocast_eligible_.size() <= device_index) {
+    is_autocast_eligible_.resize(device_index + 1);
+  }
+  int dtype_index = static_cast<int>(dtype->data_type());
+  while (is_autocast_eligible_[device_index].size() <= dtype_index) {
+    is_autocast_eligible_[device_index].resize(dtype_index + 1);
+  }
+  is_autocast_eligible_[device_index][dtype_index] = true;
+}
+
+bool AutoCastMeta::is_args_autocast_eligible(int arg_index) const {
+  CHECK_LT_OR_THROW(arg_index, is_args_autocast_eligible_.size());  // NOLINT
+  return is_args_autocast_eligible_[arg_index];
+}
+
+const std::vector<bool>& AutoCastMeta::is_args_autocast_eligible() const {
+  return is_args_autocast_eligible_;
+}
+
+void AutoCastMeta::set_arg_autocast_eligible(int arg_index) {
+  CHECK_LT_OR_THROW(arg_index, is_args_autocast_eligible_.size());  // NOLINT
+  is_args_autocast_eligible_[arg_index] = true;
+}
+
+std::shared_ptr<AutoCastMeta> MakeAutoCastMeta(
+    const std::string& op_type_name,
+    const std::vector<std::pair<std::string, int32_t>>& input_args) {
+  auto autocast_meta = std::make_shared<AutoCastMeta>(input_args.size());
+  if (AutoMixedPrecisionLists::WhiteList().count(op_type_name)) {
+    autocast_meta->set_autocast_color(kWhite);
+  } else if (AutoMixedPrecisionLists::GrayList().count(op_type_name)) {
+    autocast_meta->set_autocast_color(kGray);
+  } else if (AutoMixedPrecisionLists::ClearList().count(op_type_name)) {
+    autocast_meta->set_autocast_color(kClear);
+  } else {
+    autocast_meta->set_autocast_color(kBlack);
+  }
+  for (int i = 0; i < input_args.size(); ++i) {
+    if (!amp::IsNoCast(op_type_name, input_args[i])) {
+      autocast_meta->set_arg_autocast_eligible(i);
+    }
+  }
+  // autocast only supports the following device type(s) and low precision type(s):
+  //   - device type: CUDA
+  //   - low precision type: half, bfloat16
+  static std::vector<DeviceType> autocast_device_types{kCUDA};
+  static std::vector<Symbol<DType>> autocast_dtypes{DType::Float16(), DType::BFloat16()};
+
+  if (autocast_meta->autocast_color() != kBlack) {
+    for (auto device_type : autocast_device_types) {
+      for (auto dtype : autocast_dtypes) {
+        autocast_meta->set_autocast_eligible(device_type, dtype);
+      }
+    }
+  }
+  return autocast_meta;
+}
+
+}  // namespace autocast
+}  // namespace oneflow
@@ -0,0 +1,75 @@
+/*
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+#ifndef ONEFLOW_CORE_FRAMEWORK_AUTOCAST_H_
+#define ONEFLOW_CORE_FRAMEWORK_AUTOCAST_H_
+
+#include "oneflow/core/framework/device.h"
+#include "oneflow/core/framework/dtype.h"
+
+namespace oneflow {
+namespace autocast {
+
+bool is_enabled();
+void set_enabled(bool enabled);
+
+DeviceType get_autocast_device_type();
+void set_autocast_device_type(DeviceType device_type);
+
+Symbol<DType> get_autocast_dtype();
+Symbol<DType> get_autocast_cpu_dtype();
+Symbol<DType> get_autocast_gpu_dtype();
+
+void set_autocast_dtype(Symbol<DType> dtype);
+void set_autocast_cpu_dtype(Symbol<DType> dtype);
+void set_autocast_gpu_dtype(Symbol<DType> dtype);
+
+bool is_autocast_cache_enabled();
+void set_autocast_cache_enabled(bool enabled);
+void clear_cache();
+
+enum AutoCastColor { kWhite, kGray, kClear, kBlack };
+
+class AutoCastMeta final {
+ public:
+  AutoCastMeta() : AutoCastMeta(0) {}
+  explicit AutoCastMeta(int args_num)
+      : autocast_color_(kBlack), is_args_autocast_eligible_(args_num, false) {}
+
+  AutoCastColor autocast_color() const;
+
+  bool is_autocast_eligible(DeviceType device_type, Symbol<DType> dtype) const;
+
+  bool is_args_autocast_eligible(int arg_index) const;
+  const std::vector<bool>& is_args_autocast_eligible() const;
+
+  void set_autocast_color(AutoCastColor color);
+  void set_autocast_eligible(DeviceType device_type, Symbol<DType> dtype);
+  void set_arg_autocast_eligible(int arg_index);
+
+ private:
+  AutoCastColor autocast_color_;
+  std::vector<std::vector<bool>> is_autocast_eligible_;
+  std::vector<bool> is_args_autocast_eligible_;
+};
+
+std::shared_ptr<AutoCastMeta> MakeAutoCastMeta(
+    const std::string& op_type_name,
+    const std::vector<std::pair<std::string, int32_t>>& input_args);
+
+}  // namespace autocast
+}  // namespace oneflow
+
+#endif  // ONEFLOW_CORE_FRAMEWORK_AUTOCAST_H_