Build

flexflow · chenzhuofu · Aug 25, 2024 · Aug 27, 2024 · Aug 27, 2024 · Aug 27, 2024
commit 8efaec7f2590bc4b8613c9f742910119d67df71a
diff --git a/lib/kernels/include/kernels/array_shape.h b/lib/kernels/include/kernels/array_shape.h
@@ -18,7 +18,7 @@ struct ArrayShape {
   explicit ArrayShape(nonnegative_int *dims, nonnegative_int num_dims);
   explicit ArrayShape(TensorShape const &shape);
   explicit ArrayShape(std::vector<nonnegative_int> const &);
-  explicit ArrayShape(LegionTensorDims const &);
+  explicit ArrayShape(LegionOrdered<nonnegative_int> const &);
 
   /**
    * @brief Alias of ArrayShape::num_elements for compatibility with
@@ -53,9 +53,6 @@ struct ArrayShape {
   ArrayShape sub_shape(std::optional<legion_dim_t> start,
                        std::optional<legion_dim_t> end) const;
 
-  bool operator==(ArrayShape const &) const;
-  bool operator!=(ArrayShape const &) const;
-
 public:
   LegionOrdered<nonnegative_int> dims;
 

diff --git a/lib/kernels/include/kernels/legion_dim.h b/lib/kernels/include/kernels/legion_dim.h
@@ -10,6 +10,8 @@ legion_dim_t add_to_legion_dim(legion_dim_t legion_dim, int value);
 
 legion_dim_t legion_dim_from_ff_dim(ff_dim_t, nonnegative_int num_dimensions);
 
+ff_dim_t ff_dim_from_legion_dim(legion_dim_t, nonnegative_int num_dimensions);
+
 template <typename T>
 using LegionOrdered = DimOrdered<legion_dim_t, T>;
 

diff --git a/lib/kernels/src/allocation.cc b/lib/kernels/src/allocation.cc
@@ -13,7 +13,6 @@ void Allocator::deallocate(void *ptr) {
 
 GenericTensorAccessorW
     Allocator::allocate_tensor(TensorShape const &tensor_shape) {
-  return {tensor_shape.data_type, ArrayShape{tensor_shape}, ptr};
   void *ptr =
       this->allocate(get_size_in_bytes(tensor_shape).unwrap_nonnegative());
   return {tensor_shape.data_type, ArrayShape{tensor_shape}, ptr};

diff --git a/lib/kernels/src/array_shape.cc b/lib/kernels/src/array_shape.cc
@@ -22,7 +22,7 @@ ArrayShape::ArrayShape(TensorShape const &shape)
 ArrayShape::ArrayShape(std::vector<nonnegative_int> const &input_dims)
     : dims(input_dims) {}
 
-ArrayShape::ArrayShape(LegionTensorDims const &legion_tensor_dims)
+ArrayShape::ArrayShape(LegionOrdered<nonnegative_int> const &legion_tensor_dims)
     : dims(legion_tensor_dims) {}
 
 nonnegative_int ArrayShape::get_volume() const {
@@ -58,23 +58,23 @@ nonnegative_int ArrayShape::at(ff_dim_t idx) const {
 
 ArrayShape ArrayShape::sub_shape(std::optional<ff_dim_t> start,
                                  std::optional<ff_dim_t> end) const {
-  std::optional<legion_dim_t> legion_start =
+  return ArrayShape{legion_ordered_from_ff_ordered(slice(ff_ordered_from_legion_ordered(this->dims), start, end))};
+}
+
+ArrayShape ArrayShape::sub_shape(std::optional<legion_dim_t> start,
+                                 std::optional<legion_dim_t> end) const {
+  std::optional<ff_dim_t> legion_start =
       transform(start, [&](auto const &start_unwrapped) {
-        return legion_dim_from_ff_dim(start_unwrapped, num_dims());
+        return ff_dim_from_legion_dim(start_unwrapped, num_dims());
       });
 
-  std::optional<legion_dim_t> legion_end =
+  std::optional<ff_dim_t> legion_end =
       transform(end, [&](auto const &end_unwrapped) {
-        return legion_dim_from_ff_dim(end_unwrapped, num_dims());
+        return ff_dim_from_legion_dim(end_unwrapped, num_dims());
       });
   return this->sub_shape(legion_start, legion_end);
 }
 
-ArrayShape ArrayShape::sub_shape(std::optional<legion_dim_t> start,
-                                 std::optional<legion_dim_t> end) const {
-  return ArrayShape{slice(this->dims, start, end)};
-}
-
 bool ArrayShape::operator==(ArrayShape const &other) const {
   return this->tie() == other.tie();
 }
@@ -83,11 +83,11 @@ bool ArrayShape::operator!=(ArrayShape const &other) const {
   return this->tie() != other.tie();
 }
 
-ArrayShape ArrayShape::sub_shape(
-    std::optional<std::variant<ff_dim_t, legion_dim_t>> start,
-    std::optional<std::variant<ff_dim_t, legion_dim_t>> end) const {
-  NOT_IMPLEMENTED();
-}
+// ArrayShape ArrayShape::sub_shape(
+//     std::optional<std::variant<ff_dim_t, legion_dim_t>> start,
+//     std::optional<std::variant<ff_dim_t, legion_dim_t>> end) const {
+//   NOT_IMPLEMENTED();
+// }
 
 std::optional<nonnegative_int> ArrayShape::at_maybe(legion_dim_t index) const {
   if (index.value < dims.size()) {
@@ -114,14 +114,6 @@ TensorShape get_tensor_shape(ArrayShape const &shape, DataType dtype) {
                      dtype};
 }
 
-bool ArrayShape::operator==(ArrayShape const &other) const {
-  return this->dims == other.dims;
-}
-
-bool ArrayShape::operator!=(ArrayShape const &other) const {
-  return this->dims != other.dims;
-}
-
 std::string format_as(ArrayShape const &x) {
   std::ostringstream oss;
   oss << "<ArrayShape";

diff --git a/lib/kernels/src/cuda/ops/concat_kernels.cu b/lib/kernels/src/cuda/ops/concat_kernels.cu
@@ -16,6 +16,7 @@
 #include "device.h"
 #include "kernels/concat_kernels.h"
 #include "kernels/legion_dim.h"
+#include "utils/nonnegative_int/nonnegative_int.h"
 #include <cassert>
 
 namespace FlexFlow {
@@ -27,7 +28,7 @@ void calc_blk_size(size_t &num_blocks,
                    ArrayShape const &shape,
                    ff_dim_t axis) {
   legion_dim_t axis_legion_dim = legion_dim_from_ff_dim(axis, shape.num_dims());
-  blk_size = shape.sub_shape(legion_dim_t{0}, axis_legion_dim).num_elements().unwrap_nonnegative();
+  blk_size = shape.sub_shape(legion_dim_t{nonnegative_int{0}}, axis_legion_dim).num_elements().unwrap_nonnegative();
   num_blocks = shape.sub_shape(axis, std::nullopt).num_elements().unwrap_nonnegative();
 }
 

diff --git a/lib/kernels/src/legion_dim.cc b/lib/kernels/src/legion_dim.cc
@@ -13,4 +13,10 @@ legion_dim_t legion_dim_from_ff_dim(ff_dim_t ff_dim,
                                       ff_dim.value.unwrap_nonnegative() - 1}};
 }
 
+ff_dim_t legion_dim_from_ff_dim(legion_dim_t legion_dim,
+                                    nonnegative_int num_dimensions) {
+  return ff_dim_t{nonnegative_int{num_dimensions.unwrap_nonnegative() -
+                                      legion_dim.value.unwrap_nonnegative() - 1}};
+}
+
 } // namespace FlexFlow
diff --git a/lib/local-execution/src/local-execution/ops/transpose.cc b/lib/local-execution/src/local-execution/ops/transpose.cc
@@ -28,24 +28,8 @@ enum Slots {
   OUTPUT, // tensor
   ATTRS,
   PROFILING,
-  PER_DEVICE_STATE,
 };
 
-OpTaskInvocation init(TransposeAttrs const &attrs) {
-  OpTaskBinding binding;
-  binding.bind_arg(ATTRS, attrs);
-  return {task_id_t::TRANSPOSE_INIT_TASK_ID, binding};
-}
-
-static DeviceSpecificDeviceStates
-    init_task_impl(TaskArgumentAccessor const &acc) {
-  auto const &attrs = acc.get_argument<TransposeAttrs>(ATTRS);
-  std::vector<ff_dim_t> perm = inner_to_outer_idxs(attrs.perm);
-  TransposePerDeviceState per_device_state = init_kernel(perm.size(), perm);
-
-  return DeviceSpecificDeviceStates{
-      DeviceSpecific<TransposePerDeviceState>::create(per_device_state)};
-}
 
 OpTaskInvocation forward(TransposeAttrs const &attrs) {
   OpTaskBinding binding;
@@ -95,9 +79,6 @@ OpTaskInvocation backward(TransposeAttrs const &attrs) {
   return {task_id_t::TRANSPOSE_BWD_TASK_ID, binding};
 }
 
-TaskImplFunction get_transpose_init_task_impl() {
-  return TaskImplFunction{InitOpTaskImplFunction{init_task_impl}};
-}
 
 TaskImplFunction get_transpose_fwd_task_impl() {
   return TaskImplFunction{FwdBwdOpTaskImplFunction{forward_task_impl}};
@@ -107,13 +88,6 @@ TaskImplFunction get_transpose_bwd_task_impl() {
   return TaskImplFunction{FwdBwdOpTaskImplFunction{backward_task_impl}};
 }
 
-OpTaskSignature get_transpose_init_signature() {
-  OpTaskSignature init(OpTaskType::INIT);
-
-  init.add_arg_slot<TransposeAttrs>(ATTRS);
-  init.add_return_value<TransposePerDeviceState>();
-  return init;
-}
 
 OpTaskSignature get_transpose_fwd_signature() {
   OpTaskSignature fwd(OpTaskType::FWD);
@@ -131,7 +105,7 @@ OpTaskSignature get_transpose_bwd_signature() {
 }
 
 std::vector<task_id_t> get_task_ids(TransposeAttrs const &) {
-  return {task_id_t::TRANSPOSE_INIT_TASK_ID, task_id_t::TRANSPOSE_FWD_TASK_ID, task_id_t::TRANSPOSE_BWD_TASK_ID};
+  return {task_id_t::TRANSPOSE_FWD_TASK_ID, task_id_t::TRANSPOSE_BWD_TASK_ID};
 }
 
 } // namespace FlexFlow
diff --git a/lib/local-execution/src/local_cost_estimator.cc b/lib/local-execution/src/local_cost_estimator.cc
@@ -19,7 +19,7 @@ namespace FlexFlow {
 LocalCostEstimator::LocalCostEstimator(RuntimeArgConfig const &config)
     : runtime_arg_config(config) {}
 
-static ComputationGraph const &
+static ComputationGraph
     create_computation_graph_for_local_cost_estimation(
         PCGOperatorAttrs const &op,
         std::vector<ParallelTensorShape> const &inputs,

diff --git a/lib/local-execution/src/loss_functions.cc b/lib/local-execution/src/loss_functions.cc
@@ -17,6 +17,7 @@
 #include "kernels/loss_function_kernels.h"
 #include "local-execution/loss_functions.h"
 #include "local-execution/profiling.h"
+#include "utils/nonnegative_int/nonnegative_int.h"
 
 namespace FlexFlow {
 
@@ -54,52 +55,52 @@ static void backward_task_impl(TaskArgumentAccessor const &acc) {
   auto logit_grad = acc.get_tensor_grad<Permissions::RW>(LOGIT_GRAD);
   auto logit = acc.get_tensor<Permissions::RO>(LOGIT);
   auto label = acc.get_loss_tensor<Permissions::RO>(LABEL);
-  int batch_size = logit.shape.at(legion_dim_t{1});
+  int batch_size = logit.shape.at(legion_dim_t{nonnegative_int{1}}).unwrap_nonnegative();
   // assuming logit shape is [batch dim, num classes]
 
   LossFunction loss_type = get_loss_function(attrs);
   float scale_factor = 1.0f / batch_size;
   if (loss_type == LossFunction::MEAN_SQUARED_ERROR_AVG_REDUCE) {
     assert(logit.shape.get_volume() == label.shape.get_volume());
-    scale_factor = 2.0f / logit.shape.get_volume();
+    scale_factor = 2.0f / logit.shape.get_volume().unwrap_nonnegative();
   }
 
   if (loss_type == LossFunction::SPARSE_CATEGORICAL_CROSSENTROPY) {
     // label shape is [batch dim, 1]
     auto scce_attrs = attrs.get<SparseCategoricalCrossEntropyLossAttrs>();
-    size_t ndim = logit.shape.num_dims();
-    int num_classes = logit.shape.at(legion_dim_t{0});
+    size_t ndim = logit.shape.num_dims().unwrap_nonnegative();
+    int num_classes = logit.shape.at(legion_dim_t{nonnegative_int{0}}).unwrap_nonnegative();
     assert(logit_grad.shape == logit.shape);
     int k = 1;
     if (scce_attrs.replace_labels) {
-      k = logit.shape.at(legion_dim_t(ndim - 1)) /
+      k = logit.shape.at(legion_dim_t(nonnegative_int{ndim - 1})).unwrap_nonnegative() /
           label.shape.at(legion_dim_t(
-              ndim - 1)); // TODO FIXME something seems wrong here, isn't the
+              nonnegative_int{ndim - 1})).unwrap_nonnegative(); // TODO FIXME something seems wrong here, isn't the
                           // numerator guaranteed to be 1? <--- this is not the
                           // case because of the potential parallel dim
     }
-    assert(label.shape.sub_shape(legion_dim_t(1), std::nullopt) ==
-           logit.shape.sub_shape(legion_dim_t(1), std::nullopt));
-    assert(k * label.shape.at(legion_dim_t(ndim - 1)) ==
-           logit.shape.at(legion_dim_t(ndim - 1)));
-    assert(label.shape.at(legion_dim_t(0)) == 1);
+    assert(label.shape.sub_shape(legion_dim_t(nonnegative_int{1}), std::nullopt) ==
+           logit.shape.sub_shape(legion_dim_t(nonnegative_int{1}), std::nullopt));
+    assert(k * label.shape.at(legion_dim_t(nonnegative_int{ndim - 1})).unwrap_nonnegative() ==
+           logit.shape.at(legion_dim_t(nonnegative_int{ndim - 1})).unwrap_nonnegative());
+    assert(label.shape.at(legion_dim_t(nonnegative_int{0})).unwrap_nonnegative() == 1);
 
     profile(sparse_categorical_crossentropy_loss_backward_kernel,
             profiling,
             "[SparseCategoricalCrossEntropyLoss] backward_time = %.2lfms\n",
             get_float_ptr(logit_grad),
             get_float_ptr(logit),
             reinterpret_cast<int const *>(get_float_ptr(label)),
-            get_volume(logit.shape),
-            get_volume(logit_grad.shape),
+            get_volume(logit.shape).unwrap_nonnegative(),
+            get_volume(logit_grad.shape).unwrap_nonnegative(),
             batch_size,
             num_classes,
             k,
             scale_factor);
   } else {
     assert(logit.shape == label.shape);
     assert(logit_grad.shape == logit.shape);
-    int num_channels = logit.shape.at(legion_dim_t{0});
+    int num_channels = logit.shape.at(legion_dim_t{nonnegative_int{0}}).unwrap_nonnegative();
     switch (loss_type) {
       case LossFunction::CATEGORICAL_CROSSENTROPY: {
         profile(categorical_crossentropy_loss_backward_kernel,
@@ -108,8 +109,8 @@ static void backward_task_impl(TaskArgumentAccessor const &acc) {
                 get_float_ptr(logit_grad),
                 get_float_ptr(logit),
                 get_float_ptr(label),
-                get_volume(logit.shape),
-                get_volume(logit_grad.shape),
+                get_volume(logit.shape).unwrap_nonnegative(),
+                get_volume(logit_grad.shape).unwrap_nonnegative(),
                 scale_factor);
         break;
       }
@@ -120,8 +121,8 @@ static void backward_task_impl(TaskArgumentAccessor const &acc) {
                 get_float_ptr(logit_grad),
                 get_float_ptr(logit),
                 get_float_ptr(label),
-                get_volume(logit.shape),
-                get_volume(logit_grad.shape),
+                get_volume(logit.shape).unwrap_nonnegative(),
+                get_volume(logit_grad.shape).unwrap_nonnegative(),
                 scale_factor);
         break;
       }
@@ -131,15 +132,15 @@ static void backward_task_impl(TaskArgumentAccessor const &acc) {
                 "[IdentityLoss] backward_time = %.2lfms\n",
                 get_float_ptr(logit_grad),
                 get_float_ptr(logit),
-                get_volume(logit.shape),
-                get_volume(logit_grad.shape),
+                get_volume(logit.shape).unwrap_nonnegative(),
+                get_volume(logit_grad.shape).unwrap_nonnegative(),
                 scale_factor);
         break;
       }
       default:
-        throw mk_runtime_error(
+        throw mk_runtime_error(fmt::format(
             "Unsupported loss function {}. Please report this as an issue.",
-            loss_type);
+            loss_type));
     }
   }
 }

diff --git a/lib/local-execution/src/optimizer.cc b/lib/local-execution/src/optimizer.cc
@@ -59,11 +59,11 @@ static void sgd_update_task_impl(TaskArgumentAccessor const &acc) {
   auto profiling = acc.get_argument<ProfilingSettings>(PROFILING);
 
   assert(weight.shape == weight_grad.shape);
-  size_t size = weight_grad.shape.get_volume();
+  int size = weight_grad.shape.get_volume().unwrap_nonnegative();
 
-  assert(weight_grad.shape.get_volume() & weight.shape.get_volume() == 0);
-  size_t num_replicas =
-      weight_grad.shape.get_volume() / weight.shape.get_volume();
+  assert(weight_grad.shape.get_volume().unwrap_nonnegative() & weight.shape.get_volume().unwrap_nonnegative() == 0);
+  int num_replicas =
+      weight_grad.shape.get_volume().unwrap_nonnegative() / weight.shape.get_volume().unwrap_nonnegative();
 
   float *sgd_v_ptr;
   if (attrs.momentum > 0.0f) {
@@ -153,11 +153,11 @@ static void adam_update_task_impl(TaskArgumentAccessor const &acc) {
   auto profiling = acc.get_argument<ProfilingSettings>(PROFILING);
 
   assert(weight.shape == weight_grad.shape);
-  size_t size = weight_grad.shape.get_volume();
+  int size = weight_grad.shape.get_volume().unwrap_nonnegative();
 
-  assert(weight_grad.shape.get_volume() % weight.shape.get_volume() == 0);
-  size_t num_replicas =
-      weight_grad.shape.get_volume() / weight.shape.get_volume();
+  assert(weight_grad.shape.get_volume().unwrap_nonnegative() % weight.shape.get_volume().unwrap_nonnegative() == 0);
+  int num_replicas =
+      weight_grad.shape.get_volume().unwrap_nonnegative() / weight.shape.get_volume().unwrap_nonnegative();
 
   if (CHOSEN_SYNC_TYPE == ParamSync::NCCL) {
     auto handle = acc.get_argument<PerDeviceFFHandle>(HANDLE);

diff --git a/lib/local-execution/src/task_registry.cc b/lib/local-execution/src/task_registry.cc
@@ -36,8 +36,8 @@ void register_tasks_for_layer(TaskRegistry &task_registry,
         task_registry.backward_task_ids[op_id] = task_id;
         break;
       default:
-        throw mk_runtime_error("Invalid OpTaskType, got {}",
-                               task_signature_impl.task_signature.type);
+        throw mk_runtime_error(fmt::format("Invalid OpTaskType, got {}",
+                               task_signature_impl.task_signature.type));
     }
     task_registry.task_mapping.insert({task_id, task_signature_impl});
   }
@@ -58,7 +58,7 @@ bool registry_contains_task_for_layer(TaskRegistry const &task_registry,
       task_ids = task_registry.backward_task_ids;
       break;
     default:
-      throw mk_runtime_error("Invalid OpTaskType, got {}", op_task_type);
+      throw mk_runtime_error(fmt::format("Invalid OpTaskType, got {}", op_task_type));
   }
 
   return task_ids.at(op).has_value();