Merge master

flexflow · reyna-abhyankar · Aug 25, 2024 · Aug 27, 2024 · Aug 27, 2024 · Aug 27, 2024
commit 9047edce81c68daf31692e9c9d24be77787bfa56
diff --git a/bin/export-model-arch/src/export_model_arch.cc b/bin/export-model-arch/src/export_model_arch.cc
@@ -3,6 +3,7 @@
 #include "export_model_arch/json_sp_model_export.dtg.h"
 #include "models/bert/bert.h"
 #include "models/candle_uno/candle_uno.h"
+#include "models/dlrm/dlrm.h"
 #include "models/inception_v3/inception_v3.h"
 #include "models/split_test/split_test.h"
 #include "models/transformer/transformer.h"
@@ -69,6 +70,8 @@ tl::expected<ComputationGraph, std::string>
     return get_candle_uno_computation_graph(get_default_candle_uno_config());
   } else if (model_name == "bert") {
     return get_bert_computation_graph(get_default_bert_config());
+  } else if (model_name == "dlrm") {
+    return get_dlrm_computation_graph(get_default_dlrm_config());
   } else if (model_name == "split_test") {
     nonnegative_int batch_size = 8_n;
     return get_split_test_computation_graph(batch_size);
@@ -144,6 +147,7 @@ int main(int argc, char **argv) {
                                             "inception_v3",
                                             "candle_uno",
                                             "bert",
+                                            "dlrm",
                                             "split_test",
                                             "single_operator"};
   CLIArgumentKey key_model_name = cli_add_positional_argument(

diff --git a/cmake/doctestlib.cmake b/cmake/doctestlib.cmake
@@ -3,14 +3,14 @@ include(aliasing)
 if (FF_USE_EXTERNAL_DOCTEST)
   find_package(doctest REQUIRED)
   include(doctest) # import doctest_discover_tests
+
+  target_compile_definitions(
+    doctest::doctest
+    INTERFACE
+      DOCTEST_CONFIG_REQUIRE_STRINGIFICATION_FOR_ALL_USED_TYPES
+  )
+  alias_library(doctest doctest::doctest)
 else()
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/deps/doctest)
   include(${CMAKE_CURRENT_SOURCE_DIR}/deps/doctest/scripts/cmake/doctest.cmake)
 endif()
-
-target_compile_definitions(
-  doctest::doctest
-  INTERFACE
-    DOCTEST_CONFIG_REQUIRE_STRINGIFICATION_FOR_ALL_USED_TYPES
-)
-alias_library(doctest doctest::doctest)
diff --git a/lib/compiler/include/compiler/graph_optimize_result.h b/lib/compiler/include/compiler/graph_optimize_result.h
@@ -0,0 +1,13 @@
+#ifndef _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_GRAPH_OPTIMIZE_RESULT_H
+#define _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_GRAPH_OPTIMIZE_RESULT_H
+
+#include "compiler/graph_optimize_result.dtg.h"
+
+namespace FlexFlow {
+
+std::string format_as(GraphOptimizeResult const &);
+std::ostream &operator<<(std::ostream &, GraphOptimizeResult const &);
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/compiler/include/compiler/graph_optimize_state.h b/lib/compiler/include/compiler/graph_optimize_state.h
@@ -6,8 +6,8 @@
 namespace FlexFlow {
 
 struct GraphOptimizeState {
-  GraphOptimizeState(GraphOptimizeResult const &graph_optimize_result,
-                     float runtime);
+  explicit GraphOptimizeState(GraphOptimizeResult const &graph_optimize_result,
+                              float runtime);
 
   GraphOptimizeResult graph_optimize_result;
   float runtime;
@@ -17,6 +17,9 @@ struct GraphOptimizeState {
   bool operator<(GraphOptimizeState const &other) const;
 };
 
+std::string format_as(GraphOptimizeState const &);
+std::ostream &operator<<(std::ostream &, GraphOptimizeState const &);
+
 } // namespace FlexFlow
 
 namespace std {

diff --git a/lib/compiler/src/compiler/graph_optimize_state.cc b/lib/compiler/src/compiler/graph_optimize_state.cc
@@ -1,4 +1,5 @@
 #include "compiler/graph_optimize_state.h"
+#include "compiler/graph_optimize_result.h"
 #include "pcg/parallel_computation_graph/parallel_tensor_guid_t.h"
 
 namespace FlexFlow {
@@ -54,6 +55,16 @@ bool GraphOptimizeState::operator<(GraphOptimizeState const &other) const {
   return runtime < other.runtime;
 }
 
+std::string format_as(GraphOptimizeState const &st) {
+  return fmt::format("<GraphOptimizeState graph_optimize_result={} runtime={}>",
+                     st.graph_optimize_result,
+                     st.runtime);
+}
+
+std::ostream &operator<<(std::ostream &s, GraphOptimizeState const &st) {
+  return (s << fmt::to_string(st));
+}
+
 } // namespace FlexFlow
 
 namespace std {

diff --git a/.../series_parallel/computation_graph/get_computation_graph_series_parallel_decomposition.cc b/.../series_parallel/computation_graph/get_computation_graph_series_parallel_decomposition.cc
@@ -15,7 +15,7 @@ std::string render_preprocessed_computation_graph_for_sp_decomposition(
     ComputationGraph const &cg) {
   std::unordered_set<layer_guid_t> weight_and_input_layers =
       filter(get_layers(cg), [&](layer_guid_t const &l) {
-        ComputationGraphOpAttrs op_attrs = get_layer_attrs(cg, l).attrs;
+        ComputationGraphOpAttrs op_attrs = get_layer_attrs(cg, l).op_attrs;
         return op_attrs.has<WeightAttrs>() || op_attrs.has<InputAttrs>();
       });
 
@@ -41,7 +41,7 @@ std::string render_preprocessed_computation_graph_for_sp_decomposition(
       return "FAKE";
     }
     LayerAttrs a = cg.raw_graph.at(n);
-    RecordFormatter r = as_dot(a.attrs);
+    RecordFormatter r = as_dot(a.op_attrs);
 
     if (a.name.has_value()) {
       RecordFormatter rr;
@@ -75,7 +75,7 @@ std::optional<SeriesParallelDecomposition>
   DiGraphView preprocessed_digraph = [&] {
     std::unordered_set<layer_guid_t> weight_and_input_layers =
         filter(get_layers(cg), [&](layer_guid_t const &l) {
-          ComputationGraphOpAttrs op_attrs = get_layer_attrs(cg, l).attrs;
+          ComputationGraphOpAttrs op_attrs = get_layer_attrs(cg, l).op_attrs;
           return op_attrs.has<WeightAttrs>() || op_attrs.has<InputAttrs>();
         });
 

diff --git a/lib/compiler/src/compiler/series_parallel/pcg/get_pcg_series_parallel_decomposition.cc b/lib/compiler/src/compiler/series_parallel/pcg/get_pcg_series_parallel_decomposition.cc
@@ -1,10 +1,80 @@
 #include "compiler/series_parallel/pcg/get_pcg_series_parallel_decomposition.h"
+#include "op-attrs/pcg_operator_attrs.h"
+#include "pcg/parallel_computation_graph/parallel_computation_graph.h"
+#include "utils/containers/get_only.h"
+#include "utils/graph/digraph/algorithms/materialize_digraph_view.h"
+#include "utils/graph/instances/adjacency_digraph.h"
+#include "utils/graph/series_parallel/get_series_parallel_decomposition.h"
 
 namespace FlexFlow {
 
 std::optional<SeriesParallelDecomposition>
-    get_pcg_series_parallel_decomposition(ParallelComputationGraph const &) {
-  NOT_IMPLEMENTED();
+    get_pcg_series_parallel_decomposition(ParallelComputationGraph const &pcg) {
+  {
+    DiGraphView unpreprocessed_digraph = pcg.raw_graph;
+    std::optional<SeriesParallelDecomposition> unpreprocessed_sp_decomposition =
+        get_series_parallel_decomposition(unpreprocessed_digraph);
+    if (unpreprocessed_sp_decomposition.has_value()) {
+      return unpreprocessed_sp_decomposition.value();
+    }
+  }
+
+  auto layer_is_weight_or_input = [&](parallel_layer_guid_t const &l) {
+    PCGOperatorAttrs op_attrs = get_parallel_layer_attrs(pcg, l).op_attrs;
+    return op_attrs.has<WeightAttrs>() || op_attrs.has<InputAttrs>();
+  };
+
+  auto layer_is_parallel_op = [&](parallel_layer_guid_t const &l) {
+    PCGOperatorAttrs op_attrs = get_parallel_layer_attrs(pcg, l).op_attrs;
+    return is_parallel_op(op_attrs);
+  };
+
+  std::function<parallel_layer_guid_t(parallel_layer_guid_t const &)>
+      follow_to_last_parallel_op =
+          [&](parallel_layer_guid_t const &starting_point)
+      -> parallel_layer_guid_t {
+    assert(layer_is_weight_or_input(starting_point) ||
+           layer_is_parallel_op(starting_point));
+
+    std::unordered_set<parallel_layer_guid_t> successors =
+        get_successors(pcg, starting_point);
+
+    if (successors.size() != 1) {
+      return starting_point;
+    }
+
+    parallel_layer_guid_t successor =
+        get_only(get_successors(pcg, starting_point));
+
+    assert(!layer_is_weight_or_input(successor));
+    if (layer_is_parallel_op(successor)) {
+      return follow_to_last_parallel_op(successor);
+    } else {
+      return starting_point;
+    }
+  };
+
+  DiGraphView preprocessed_digraph = [&] {
+    std::unordered_set<parallel_layer_guid_t> weight_and_input_layers =
+        filter(get_parallel_layers(pcg), layer_is_weight_or_input);
+
+    std::unordered_set<parallel_layer_guid_t> par_chain_endpoints =
+        transform(weight_and_input_layers, follow_to_last_parallel_op);
+
+    std::unordered_set<parallel_layer_guid_t> par_chain_endpoint_successors =
+        get_subgraph_successors(pcg, par_chain_endpoints);
+
+    DiGraph digraph = materialize_digraph_view<AdjacencyDiGraph>(pcg.raw_graph);
+    for (parallel_layer_guid_t const &src : par_chain_endpoints) {
+      for (parallel_layer_guid_t const &dst : par_chain_endpoint_successors) {
+        digraph.add_edge(DirectedEdge{src.raw_graph_node, dst.raw_graph_node});
+      }
+    }
+
+    return digraph;
+  }();
+
+  return get_series_parallel_decomposition(preprocessed_digraph);
 }
 
 } // namespace FlexFlow
diff --git a/lib/compiler/src/compiler/task_graph_simulator/simulate_task_graph_execution.cc b/lib/compiler/src/compiler/task_graph_simulator/simulate_task_graph_execution.cc
@@ -29,11 +29,11 @@ TaskGraphExecutionTrace simulate_task_graph_execution(
         "simulate_task_graph_execution cannot simulate cyclic directed graphs");
   }
 
-  TaskGraphExecutionState execution_state =
-      TaskGraphExecutionState{/*ready_tasks=*/set_of(get_sources(task_graph)),
-                              /*in_progress_tasks=*/{},
-                              /*finished_tasks=*/{},
-                              /*current_time=*/0.0};
+  TaskGraphExecutionState execution_state = TaskGraphExecutionState{
+      /*ready_tasks=*/set_of(get_initial_nodes(task_graph)),
+      /*in_progress_tasks=*/{},
+      /*finished_tasks=*/{},
+      /*current_time=*/0.0};
 
   std::unordered_set<TaskProfile> task_profiles;
 

diff --git a/lib/compiler/test/src/compiler/graph_optimize_result.cc b/lib/compiler/test/src/compiler/graph_optimize_result.cc
@@ -0,0 +1,15 @@
+#include "compiler/graph_optimize_result.h"
+
+namespace FlexFlow {
+
+std::string format_as(GraphOptimizeResult const &r) {
+  return fmt::format("<GraphOptimizeResult\npcg={}\nmachine_mapping={}>",
+                     as_dot(r.pcg),
+                     r.machine_mapping);
+}
+
+std::ostream &operator<<(std::ostream &s, GraphOptimizeResult const &r) {
+  return (s << fmt::to_string(r));
+}
+
+} // namespace FlexFlow