Loss and update tests

flexflow · reyna-abhyankar · Aug 25, 2024 · Aug 27, 2024 · Aug 27, 2024 · Aug 27, 2024
commit f0a4285bf4262bc793f9e4e8f4aa4e2c51d048fd
diff --git a/lib/local-execution/include/local-execution/allocated_tensors.h b/lib/local-execution/include/local-execution/allocated_tensors.h
@@ -25,6 +25,8 @@ bool is_allocated_tensor_backing_valid(
     std::unordered_map<TensorTypeVariant, GenericTensorAccessorW> const &,
     ArrayShape const &);
 
+AllocatedTensors make_empty_allocated_tensors();
+
 } // namespace FlexFlow
 
 #endif
diff --git a/lib/local-execution/src/allocated_tensors.cc b/lib/local-execution/src/allocated_tensors.cc
@@ -138,4 +138,8 @@ bool are_allocated_tensors_valid(
          are_allocated_optimizer_tensors_valid(allocated_tensors, tensor_attrs);
 }
 
+AllocatedTensors make_empty_allocated_tensors() {
+  return AllocatedTensors{{}, {}, {}};
+}
+
 } // namespace FlexFlow
diff --git a/lib/local-execution/test/CMakeLists.txt b/lib/local-execution/test/CMakeLists.txt
@@ -2,12 +2,7 @@ ff_add_test_executable(
   NAME
     local-execution-tests
   SRC_PATTERNS
-    src/test_allocated_tensors.cc
-    src/test_unallocated_tensors.cc
-    src/test_task_registry.cc
-    src/test_utils.cc
-    src/test_local_task_arg_accessor.cc
-    src/test_local_tensor_backing.cc
+    src/*.cc
   PRIVATE_INCLUDE 
     src/
   DEPS

diff --git a/lib/local-execution/test/src/test_local_cost_estimator.cc b/lib/local-execution/test/src/test_local_cost_estimator.cc
@@ -1,79 +1,81 @@
-// #include "doctest/doctest.h"
-// #include "kernels/local_cuda_allocator.h"
-// #include "kernels/managed_per_device_ff_handle.h"
-// #include "local-execution/local_cost_estimator.h"
-// #include "op-attrs/ops/attention.h"
-// #include "op-attrs/parallel_tensor_shape.h"
-// #include "pcg/computation_graph_builder.h"
-// #include "test_utils.h"
+#include "doctest/doctest.h"
+#include "kernels/local_cuda_allocator.h"
+#include "kernels/managed_per_device_ff_handle.h"
+#include "local-execution/local_cost_estimator.h"
+#include "op-attrs/ops/attention.h"
+#include "op-attrs/parallel_tensor_shape.h"
+#include "pcg/computation_graph_builder.h"
+#include "test_utils.h"
 
-// using namespace ::FlexFlow;
+using namespace ::FlexFlow;
 
-// TEST_SUITE(FF_CUDA_TEST_SUITE) {
-//   TEST_CASE("Local Cost Estimator") {
-//     // local backing initialization
-//     ManagedPerDeviceFFHandle managed_handle{};
+TEST_SUITE(FF_CUDA_TEST_SUITE) {
+  TEST_CASE("Local Cost Estimator") {
+    // local backing initialization
+    ManagedPerDeviceFFHandle managed_handle{};
 
-//     RuntimeArgConfig runtime_arg_config = RuntimeArgConfig{
-//         DeviceSpecific<PerDeviceFFHandle>::create(managed_handle.raw_handle()),
-//         EnableProfiling::YES,
-//         ProfilingSettings{/*warmup_iters=*/0,
-//                           /*measure_iters=*/1}};
+    RuntimeArgConfig runtime_arg_config = RuntimeArgConfig{
+        DeviceSpecific<PerDeviceFFHandle>::create(managed_handle.raw_handle()),
+        EnableProfiling::YES,
+        ProfilingSettings{/*warmup_iters=*/0,
+                          /*measure_iters=*/1}};
 
-//     LocalCostEstimator cost_estimator =
-//     LocalCostEstimator{runtime_arg_config};
+    LocalCostEstimator cost_estimator = LocalCostEstimator{runtime_arg_config};
 
-//     SUBCASE("Estimate cost -- Attention Op") {
-//       int embed_dim = 32;
-//       int num_heads = 10;
-//       MultiHeadAttentionAttrs attrs = MultiHeadAttentionAttrs{
-//           /*embed_dim=*/embed_dim,
-//           /*num_heads=*/num_heads,
-//           /*kdim=*/embed_dim,
-//           /*vdim=*/embed_dim,
-//           /*dropout=*/0.0,
-//           /*bias=*/true,
-//           /*add_bias_kv=*/false,
-//           /*add_zero_attn=*/false,
-//       };
+    SUBCASE("Estimate cost -- Attention Op") {
+      nonnegative_int embed_dim = 32_n;
+      nonnegative_int num_heads = 10_n;
+      MultiHeadAttentionAttrs attrs = MultiHeadAttentionAttrs{
+          /*embed_dim=*/embed_dim,
+          /*num_heads=*/num_heads,
+          /*kdim=*/embed_dim,
+          /*vdim=*/embed_dim,
+          /*dropout=*/0.0,
+          /*bias=*/true,
+          /*add_bias_kv=*/false,
+          /*add_zero_attn=*/false,
+      };
 
-//       size_t batch_size = 40;
-//       size_t seq_len = 48;
-//       size_t feature_size = 36;
+      nonnegative_int batch_size = 40_n;
+      nonnegative_int seq_len = 48_n;
+      nonnegative_int feature_size = 36_n;
 
-//       DataType dtype = DataType::FLOAT;
-//       ParallelTensorShape inputs_shape = lift_to_parallel(TensorShape{
-//           TensorDims{FFOrdered<size_t>{batch_size, seq_len, feature_size}},
-//           DataType::FLOAT,
-//       });
+      DataType dtype = DataType::FLOAT;
+      ParallelTensorShape inputs_shape = lift_to_parallel(TensorShape{
+          TensorDims{
+              FFOrdered<nonnegative_int>{batch_size, seq_len, feature_size}},
+          DataType::FLOAT,
+      });
 
-//       ParallelTensorShape weights_shape = throw_if_unexpected(
-//           get_weights_shape(attrs, inputs_shape, inputs_shape,
-//           inputs_shape));
-//       ParallelTensorAttrs weight_attrs =
-//           ParallelTensorAttrs{weights_shape,
-//                               /*sync_type=*/std::nullopt,
-//                               /*initializer=*/std::nullopt,
-//                               CreateGrad::YES};
+      ParallelTensorShape weights_shape = throw_if_unexpected(
+          get_weights_shape(attrs, inputs_shape, inputs_shape, inputs_shape));
+      ParallelTensorAttrs weight_attrs =
+          ParallelTensorAttrs{weights_shape,
+                              /*sync_type=*/std::nullopt,
+                              /*initializer=*/std::nullopt,
+                              CreateGrad::YES};
 
-//       ParallelTensorShape output_shape = throw_if_unexpected(
-//           get_output_shape(attrs, inputs_shape, inputs_shape, inputs_shape));
-//       ParallelTensorAttrs output_attrs =
-//           ParallelTensorAttrs{output_shape,
-//                               /*sync_type=*/std::nullopt,
-//                               /*initializer=*/std::nullopt,
-//                               CreateGrad::YES};
+      ParallelTensorShape output_shape = throw_if_unexpected(
+          get_output_shape(attrs, inputs_shape, inputs_shape, inputs_shape));
+      ParallelTensorAttrs output_attrs =
+          ParallelTensorAttrs{output_shape,
+                              /*sync_type=*/std::nullopt,
+                              /*initializer=*/std::nullopt,
+                              CreateGrad::YES};
 
-//       CostDetails result = cost_estimator.estimate_cost(
-//           PCGOperatorAttrs{attrs},
-//           std::vector<ParallelTensorShape>{
-//               inputs_shape, inputs_shape, inputs_shape},
-//           std::vector<ParallelTensorAttrs>{weight_attrs},
-//           std::vector<ParallelTensorAttrs>{output_attrs},
-//           make_1d_machine_view(gpu_id_t{0}, gpu_id_t{1}));
+      CostDetails result = cost_estimator.estimate_cost(
+          PCGOperatorAttrs{attrs},
+          std::vector<ParallelTensorShape>{
+              inputs_shape, inputs_shape, inputs_shape},
+          std::vector<ParallelTensorAttrs>{weight_attrs},
+          std::vector<ParallelTensorAttrs>{output_attrs},
+          make_1d_machine_view(
+              MachineSpaceCoordinate{0_n, 0_n, DeviceType::GPU},
+              MachineSpecificationDimension::INTRA_NODE,
+              stride_t{0_n}));
 
-//       CHECK(result.total_elapsed_time > 0);
-//       CHECK(result.total_mem_usage > 0);
-//     }
-//   }
-// }
+      CHECK(result.total_elapsed_time > 0);
+      CHECK(result.total_mem_usage > 0);
+    }
+  }
+}
diff --git a/lib/local-execution/test/src/test_loss_e2e.cc b/lib/local-execution/test/src/test_loss_e2e.cc