PCG serialization, rapidcheck, dtgen, and shape inference (#1394)

* Add initial lib/substitution-generator and bin/substitutions-to-dot * Format * Update proj version and add .proj.toml file to repo directly * Revert changes to flake.nix * Prototype implementation of dtgen * Refactor op-attrs to use dtgen * Format * More dtgen'ing * Re-pass tests * Simplify types in substitutions, more dtgen * Add new reduction dim shape inference for conv2d * Move conv2d input parsing into public headers * Remove incorrect not_implemented * Add pcg tests * Add initial test for pcg * Partial implementation of shape inference for linear * Fix rapidcheck (#8) * enable rapidchecks for op-attrs * added rc::checks * fix merge * fixed variant toml * revert proj.toml * removed additional import merged * constraint for ff_dim * lock flake --------- Co-authored-by: Rae Wong <raewong@sapling2.stanford.edu> * Attempt to hide dtgen-generated files from github diff * Fix header file name for dtgen in gitattributes * Update proj and format code * Add initial shape inference for BMM * Add half of shape inference for Attention * Finish initial shape inference for Attention * Enable op-attrs and pcg tests in CI * Add parallel shape inference for add and relu * Add parallel shape inference for embedding * Add shape inference for repartition, combine, replicate, and reduction * Include tests for reduction * Address wmdi comments * Fixup linear shape inference, add tests for linear * Fix tests * Format * Fix build errors * change lcov in ci to rm dtgen coverage * Remove dtgen from coverage * Temporarily disable substitutions build in CI * Format * Fix substitution-generator build and tests * Format * fix ci coverage attempt * second attempt * small fix * small fix --------- Co-authored-by: Rae Wong <33883582+yingyee0111@users.noreply.github.com> Co-authored-by: Rae Wong <raewong@sapling2.stanford.edu> Co-authored-by: Qinghan Chen <qinghanc@andrew.cmu.edu>
flexflow · Jun 4, 2024 · f93e262 · f93e262
1 parent c97f63f
commit f93e262
Show file tree

Hide file tree

Showing 731 changed files with 35,397 additions and 5,231 deletions.
diff --git a/.editorconfig b/.editorconfig
@@ -0,0 +1,22 @@
+root = true
+
+# Unix-style newlines with a newline ending every file
+[*]
+end_of_line = lf
+insert_final_newline = true
+
+[{CMakeLists.txt,*.cmake}]
+indent_style = space
+indent_size = 2
+
+[*.{cc,h,cu,cpp}]
+indent_style = space
+indent_size = 2
+
+[*.py]
+indent_style = space
+indent_size = 4
+
+[*.toml]
+indent_style = space
+indent_size = 2
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1,2 @@
+*.dtg.cc linguist-generated=true
+*.dtg.h linguist-generated=true
diff --git a/.github/workflows/per-lib-check.yml b/.github/workflows/per-lib-check.yml
@@ -72,13 +72,13 @@ jobs:
         run: |
           build_libs.sh kernels
 
-      - name: Build substitutions
-        run: |
-          build_libs.sh substitutions
+      # - name: Build substitutions
+      #   run: |
+      #     build_libs.sh substitutions
 
-      - name: Build compiler
-        run: |
-          build_libs.sh compiler
+      # - name: Build compiler
+      #   run: |
+      #     build_libs.sh compiler
 
       - name: Build substitution-generator
         run: |
@@ -88,30 +88,41 @@ jobs:
         run: |
           test_libs.sh utils
 
-      - name: Test substitutions
+      - name: Test op-attrs
         run: |
-          test_libs.sh substitutions
+          test_libs.sh op-attrs
 
-      - name: Test compiler
+      - name: Test pcg
         run: |
-          test_libs.sh compiler
+          test_libs.sh pcg
+
+      # - name: Test substitutions
+      #   run: |
+      #     test_libs.sh substitutions
+
+      # - name: Test compiler
+      #   run: |
+      #     test_libs.sh compiler
 
       - name: Test substitution-generator
         run: |
           test_libs.sh substitution-generator
 
       - name: Generate code coverage
         run: |
+          echo "gitwork: $GITHUB_WORKSPACE"
           lcov --capture --directory . --output-file main_coverage.info
-          lcov --remove main_coverage.info '/nix/store/' --output-file main_coverage.info
-          lcov --list main_coverage.info
+          lcov --extract main_coverage.info "$GITHUB_WORKSPACE/lib/*" --output-file main_coverage_e.info
+          lcov --remove main_coverage_e.info "$GITHUB_WORKSPACE/lib/*.dtg.h" "$GITHUB_WORKSPACE/lib/*.dtg.cc"  --output-file main_coverage_e_f.info
+          lcov --list main_coverage_e_f.info
       
       - name: Upload code coverage
         uses: codecov/codecov-action@v4
         with:
           token: ${{ secrets.CODECOV_TOKEN }}
-          files: main_coverage.info
+          file: main_coverage_e_f.info
           flags: unittests
+          plugin: pycoverage #hope this will disable gcov
           name: codecov-umbrella
           fail_ci_if_error: false
           verbose: true
diff --git a/.proj.toml b/.proj.toml
@@ -7,13 +7,18 @@ build_targets = [
   "utils",
   "op-attrs",
   "kernels",
-  "substitutions",
-  "compiler",
+  "pcg",
+  # "substitutions",
+  # "compiler",
+  "substitution-generator",
 ]
 test_targets = [
   "utils-tests",
-  "substitutions-tests",
-  "compiler-tests",
+  "op-attrs-tests",
+  "pcg-tests",
+  # "substitutions-tests",
+  # "compiler-tests",
+  "substitution-generator-tests",
 ]
 
 [cmake_flags_extra]

diff --git a/flake.lock b/flake.lock
diff --git a/flake.nix b/flake.nix
@@ -112,6 +112,10 @@
           inputsFrom = [ ci ];
           inherit (ci) CMAKE_FLAGS;
 
+          VIMPLUGINS = lib.strings.concatStringsSep "," [
+            "${proj-repo.packages.${system}.proj-nvim}"
+          ];
+
           buildInputs = builtins.concatLists [
             (with pkgs; [
               clang-tools

diff --git a/lib/compiler/test/src/test_optimal_cost.cc b/lib/compiler/test/src/test_optimal_cost.cc
@@ -41,10 +41,9 @@ TEST_SUITE(FF_TEST_SUITE) {
 
     MultiDiEdge e{n1, pcg.add_node_port(), n0, pcg.add_node_port()};
     pcg.add_edge(e);
-    pcg.add_output(e,
-                   ParallelTensor(ParallelTensorDims({2, 1}),
-                                  DataType::FLOAT,
-                                  CreateGrad::YES));
+    ParallelDim dim = {2, 1, false};
+    ParallelTensorDims dims = {FFOrdered<ParallelDim>{dim}};
+    pcg.add_output(e, ParallelTensor(dims, DataType::FLOAT, CreateGrad::YES));
 
     auto test_allowed_machine_views = [](Operator const &,
                                          MachineSpecification const &) {

diff --git a/lib/kernels/include/kernels/array_shape.h b/lib/kernels/include/kernels/array_shape.h
@@ -6,6 +6,7 @@
 #include "utils/stack_vector.h"
 #include "utils/visitable.h"
 #include <cstddef>
+#include <optional>
 #include <vector>
 
 namespace FlexFlow {
@@ -41,8 +42,10 @@ struct ArrayShape {
   std::optional<std::size_t> at_maybe(std::size_t) const;
 
   ArrayShape reversed_dim_order() const;
-  ArrayShape sub_shape(std::optional<legion_dim_t> start,
-                       std::optional<legion_dim_t> end) const;
+
+  ArrayShape
+      sub_shape(std::optional<std::variant<ff_dim_t, legion_dim_t>> start,
+                std::optional<std::variant<ff_dim_t, legion_dim_t>> end) const;
 
 public:
   LegionTensorDims dims;

diff --git a/lib/kernels/include/kernels/cast_kernels.h b/lib/kernels/include/kernels/cast_kernels.h
@@ -4,7 +4,7 @@
 #include "device.h"
 #include "kernels/accessor.h"
 #include "kernels/ff_handle.h"
-#include "op-attrs/activation.h"
+#include "op-attrs/activation.dtg.h"
 
 namespace FlexFlow {
 namespace Kernels {

diff --git a/lib/kernels/include/kernels/conv_2d_kernels.h b/lib/kernels/include/kernels/conv_2d_kernels.h
@@ -4,7 +4,7 @@
 #include "device.h"
 #include "kernels/accessor.h"
 #include "kernels/ff_handle.h"
-#include "op-attrs/activation.h"
+#include "op-attrs/activation.dtg.h"
 #include "utils/visitable.h"
 
 namespace FlexFlow {

diff --git a/lib/kernels/include/kernels/element_binary_kernels.h b/lib/kernels/include/kernels/element_binary_kernels.h
@@ -5,7 +5,7 @@
 #include "ff_handle.h"
 #include "kernels/array_shape.h"
 #include "op-attrs/datatype.h"
-#include "op-attrs/op.h"
+#include "op-attrs/operator_type.h"
 
 namespace FlexFlow {
 

diff --git a/lib/kernels/include/kernels/element_unary_kernels.h b/lib/kernels/include/kernels/element_unary_kernels.h
@@ -24,18 +24,34 @@ namespace ElementUnary {
 
 ElementUnaryPerDeviceState init_kernel(ArrayShape const &input_shape,
                                        ArrayShape const &output_shape,
-                                       ElementUnaryUnifiedAttrs const &attrs);
+                                       ElementUnaryAttrs const &attrs);
 
 void forward_kernel(ffStream_t stream,
                     ElementUnaryPerDeviceState const &device_state,
-                    ElementUnaryUnifiedAttrs const &attrs,
+                    ElementUnaryAttrs const &attrs,
                     PerDeviceFFHandle &handle,
                     GenericTensorAccessorR const &input,
                     GenericTensorAccessorW const &output);
 
+void forward_kernel(ffStream_t stream,
+                    ElementUnaryPerDeviceState const &device_state,
+                    ElementScalarUnaryAttrs const &attrs,
+                    PerDeviceFFHandle &handle,
+                    GenericTensorAccessorR const &input,
+                    GenericTensorAccessorW const &output);
+
+void backward_kernel(ffStream_t stream,
+                     ElementUnaryPerDeviceState const &device_state,
+                     ElementUnaryAttrs const &attrs,
+                     PerDeviceFFHandle &handle,
+                     GenericTensorAccessorR const &input,
+                     GenericTensorAccessorW const &input_grad,
+                     GenericTensorAccessorR const &output,
+                     GenericTensorAccessorR const &output_grad);
+
 void backward_kernel(ffStream_t stream,
                      ElementUnaryPerDeviceState const &device_state,
-                     ElementUnaryUnifiedAttrs const &attrs,
+                     ElementScalarUnaryAttrs const &attrs,
                      PerDeviceFFHandle &handle,
                      GenericTensorAccessorR const &input,
                      GenericTensorAccessorW const &input_grad,

diff --git a/lib/kernels/include/kernels/legion_dim.h b/lib/kernels/include/kernels/legion_dim.h
@@ -1,14 +1,14 @@
 #ifndef _FLEXFLOW_KERNELS_INCLUDE_KERNELS_LEGION_DIM_H
 #define _FLEXFLOW_KERNELS_INCLUDE_KERNELS_LEGION_DIM_H
 
+#include "kernels/legion_dim_t.dtg.h"
 #include "op-attrs/dim_ordered.h"
-#include "utils/strong_typedef.h"
 
 namespace FlexFlow {
 
-struct legion_dim_t : strong_typedef<legion_dim_t, int> {
-  using strong_typedef::strong_typedef;
-};
+legion_dim_t add_to_legion_dim(legion_dim_t, int);
+
+legion_dim_t legion_dim_from_ff_dim(ff_dim_t, int num_dimensions);
 
 template <typename T>
 using LegionOrdered = DimOrdered<legion_dim_t, T>;

diff --git a/lib/kernels/include/kernels/legion_dim_t.dtg.h b/lib/kernels/include/kernels/legion_dim_t.dtg.h
diff --git a/lib/kernels/include/kernels/legion_dim_t.struct.toml b/lib/kernels/include/kernels/legion_dim_t.struct.toml
@@ -0,0 +1,14 @@
+namespace = "FlexFlow"
+name = "legion_dim_t"
+
+features = [
+  "eq",
+  "ord",
+  "hash",
+  "json",
+  "fmt",
+]
+
+[[fields]]
+name = "value"
+type = "int"
diff --git a/lib/kernels/include/kernels/pool_2d_kernels.h b/lib/kernels/include/kernels/pool_2d_kernels.h
@@ -3,7 +3,7 @@
 
 #include "device.h"
 #include "kernels/ff_handle.h"
-#include "op-attrs/activation.h"
+#include "op-attrs/activation.dtg.h"
 #include "op-attrs/ops/pool_2d.h"
 #include "utils/visitable.h"
 

diff --git a/lib/kernels/include/kernels/reduce_kernels.h b/lib/kernels/include/kernels/reduce_kernels.h
@@ -4,7 +4,7 @@
 #include "array_shape.h"
 #include "device.h"
 #include "ff_handle.h"
-#include "op-attrs/op.h"
+#include "op-attrs/operator_type.dtg.h"
 
 namespace FlexFlow {
 

diff --git a/lib/kernels/include/kernels/transpose_kernels.h b/lib/kernels/include/kernels/transpose_kernels.h
@@ -8,7 +8,7 @@ namespace FlexFlow {
 
 struct TransposePerDeviceState {
   int num_dim;
-  req<std::vector<int>> perm;
+  req<std::vector<legion_dim_t>> perm;
 };
 
 FF_VISITABLE_STRUCT_NONSTANDARD_CONSTRUCTION(TransposePerDeviceState,

diff --git a/lib/kernels/src/cuda/ops/concat_kernels.cu b/lib/kernels/src/cuda/ops/concat_kernels.cu
@@ -25,15 +25,8 @@ void calc_blk_size(size_t &num_blocks,
                    size_t &blk_size,
                    ArrayShape const &shape,
                    ff_dim_t axis) {
-  num_blocks = 1;
-  blk_size = 1;
-  for (int d = 0; d < shape.num_dims(); d++) {
-    if (d <= axis) {
-      blk_size *= shape[legion_dim_t(d)];
-    } else {
-      num_blocks *= shape[legion_dim_t(d)];
-    }
-  }
+  blk_size = shape.sub_shape(legion_dim_t{0}, axis).num_elements();
+  num_blocks = shape.sub_shape(axis, std::nullopt).num_elements();
 }
 
 void forward_kernel(cudaStream_t stream,
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		*.dtg.cc linguist-generated=true
		*.dtg.h linguist-generated=true