-
Notifications
You must be signed in to change notification settings - Fork 236
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add DLRM Model Computational Graph (#1532)
* For safety... * Update * Add cast support in ComputationGraphBuilder * Minor bugfixes for DLRM computation graph builder support * Format * Remove completed todo * Update based on review comments * Address review comments * Add dlrm dot graph * PR comments * Post-merge fixes * Format * Add more info to dot output * Format --------- Co-authored-by: Colin Unger <lockshaw@lockshaw.net>
- Loading branch information
1 parent
2db671e
commit e9a1af7
Showing
21 changed files
with
556 additions
and
24 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
/** | ||
* @file dlrm.h | ||
* | ||
* @brief DLRM model | ||
* | ||
* @details The DLRM implementation refers to the examples at | ||
* https://github.com/flexflow/FlexFlow/blob/78307b0e8beb5d41ee003be8b5db168c2b3ef4e2/examples/cpp/DLRM/dlrm.cc | ||
* and | ||
* https://github.com/pytorch/torchrec/blob/7e7819e284398d7dc420e3bf149107ad310fa861/torchrec/models/dlrm.py#L440. | ||
*/ | ||
|
||
#ifndef _FLEXFLOW_LIB_MODELS_INCLUDE_MODELS_DLRM_H | ||
#define _FLEXFLOW_LIB_MODELS_INCLUDE_MODELS_DLRM_H | ||
|
||
#include "models/dlrm/dlrm_arch_interaction_op.dtg.h" | ||
#include "models/dlrm/dlrm_config.dtg.h" | ||
#include "pcg/computation_graph_builder.h" | ||
|
||
namespace FlexFlow { | ||
|
||
// Helper functions to construct the DLRM model | ||
|
||
/** | ||
* @brief Get the default DLRM config. | ||
* | ||
* @details The configs here refer to the example at | ||
* https://github.com/flexflow/FlexFlow/blob/78307b0e8beb5d41ee003be8b5db168c2b3ef4e2/examples/cpp/DLRM/dlrm.cc. | ||
*/ | ||
DLRMConfig get_default_dlrm_config(); | ||
|
||
tensor_guid_t create_dlrm_mlp(ComputationGraphBuilder &cgb, | ||
DLRMConfig const &config, | ||
tensor_guid_t const &input, | ||
std::vector<size_t> const &mlp_layers); | ||
|
||
tensor_guid_t create_dlrm_sparse_embedding_network(ComputationGraphBuilder &cgb, | ||
DLRMConfig const &config, | ||
tensor_guid_t const &input, | ||
int input_dim, | ||
int output_dim); | ||
|
||
tensor_guid_t create_dlrm_interact_features( | ||
ComputationGraphBuilder &cgb, | ||
DLRMConfig const &config, | ||
tensor_guid_t const &bottom_mlp_output, | ||
std::vector<tensor_guid_t> const &emb_outputs); | ||
|
||
/** | ||
* @brief Get the DLRM computation graph. | ||
* | ||
* @param DLRMConfig The config of DLRM model. | ||
* @return ComputationGraph The computation graph of a DLRM model. | ||
*/ | ||
ComputationGraph get_dlrm_computation_graph(DLRMConfig const &config); | ||
|
||
} // namespace FlexFlow | ||
|
||
#endif |
14 changes: 14 additions & 0 deletions
14
lib/models/include/models/dlrm/dlrm_arch_interaction_op.enum.toml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
namespace = "FlexFlow" | ||
name = "DLRMArchInteractionOp" | ||
features = [ | ||
"hash", | ||
"json", | ||
"rapidcheck", | ||
"fmt", | ||
] | ||
|
||
[[values]] | ||
name = "DOT" | ||
|
||
[[values]] | ||
name = "CAT" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
namespace = "FlexFlow" | ||
name = "DLRMConfig" | ||
|
||
features = [ | ||
"eq", | ||
"ord", | ||
"hash", | ||
"json", | ||
"rapidcheck", | ||
"fmt", | ||
] | ||
|
||
includes = [ | ||
"<vector>", | ||
"<string>", | ||
"models/dlrm/dlrm_arch_interaction_op.dtg.h", | ||
"utils/nonnegative_int/nonnegative_int.h", | ||
] | ||
|
||
src_includes = [ | ||
"utils/fmt/vector.h", | ||
"utils/hash/vector.h", | ||
] | ||
|
||
[[fields]] | ||
name = "embedding_dim" | ||
type = "::FlexFlow::nonnegative_int" | ||
|
||
[[fields]] | ||
name = "embedding_bag_size" | ||
type = "::FlexFlow::nonnegative_int" | ||
|
||
[[fields]] | ||
name = "embedding_size" | ||
type = "std::vector<::FlexFlow::nonnegative_int>" | ||
|
||
[[fields]] | ||
name = "dense_arch_layer_sizes" | ||
type = "std::vector<::FlexFlow::nonnegative_int>" | ||
|
||
[[fields]] | ||
name = "over_arch_layer_sizes" | ||
type = "std::vector<::FlexFlow::nonnegative_int>" | ||
|
||
[[fields]] | ||
name = "arch_interaction_op" | ||
type = "::FlexFlow::DLRMArchInteractionOp" | ||
|
||
[[fields]] | ||
name = "batch_size" | ||
type = "::FlexFlow::nonnegative_int" | ||
|
||
[[fields]] | ||
name = "seed" | ||
type = "int" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,173 @@ | ||
#include "models/dlrm/dlrm.h" | ||
#include "pcg/computation_graph.h" | ||
#include "utils/containers/concat_vectors.h" | ||
#include "utils/containers/repeat.h" | ||
#include "utils/containers/transform.h" | ||
#include "utils/containers/zip.h" | ||
#include "utils/nonnegative_int/num_elements.h" | ||
|
||
namespace FlexFlow { | ||
|
||
DLRMConfig get_default_dlrm_config() { | ||
return DLRMConfig{ | ||
/*embedding_dim=*/64_n, | ||
/*embedding_bag_size=*/1_n, | ||
/*embedding_size=*/ | ||
std::vector<nonnegative_int>{ | ||
1000000_n, | ||
1000000_n, | ||
1000000_n, | ||
1000000_n, | ||
}, | ||
/*dense_arch_layer_sizes=*/ | ||
std::vector<nonnegative_int>{ | ||
4_n, | ||
64_n, | ||
64_n, | ||
}, | ||
/*over_arch_layer_sizes=*/ | ||
std::vector<nonnegative_int>{ | ||
64_n, | ||
64_n, | ||
2_n, | ||
}, | ||
/*arch_interaction_op=*/DLRMArchInteractionOp::CAT, | ||
/*batch_size=*/64_n, | ||
/*seed=*/std::rand(), | ||
}; | ||
} | ||
|
||
tensor_guid_t create_dlrm_mlp(ComputationGraphBuilder &cgb, | ||
DLRMConfig const &config, | ||
tensor_guid_t const &input, | ||
std::vector<nonnegative_int> const &mlp_layers) { | ||
tensor_guid_t t = input; | ||
|
||
// Refer to | ||
// https://github.com/facebookresearch/dlrm/blob/64063a359596c72a29c670b4fcc9450bb342e764/dlrm_s_pytorch.py#L218-L228 | ||
// for example initializer. | ||
for (size_t i = 0; i < mlp_layers.size() - 1; i++) { | ||
float std_dev = sqrt(2.0f / (mlp_layers.at(i + 1) + mlp_layers.at(i))); | ||
InitializerAttrs projection_initializer = | ||
InitializerAttrs{NormInitializerAttrs{ | ||
/*seed=*/config.seed, | ||
/*mean=*/0, | ||
/*stddev=*/std_dev, | ||
}}; | ||
|
||
std_dev = sqrt(2.0f / mlp_layers.at(i + 1)); | ||
InitializerAttrs bias_initializer = InitializerAttrs{NormInitializerAttrs{ | ||
/*seed=*/config.seed, | ||
/*mean=*/0, | ||
/*stddev=*/std_dev, | ||
}}; | ||
|
||
t = cgb.dense(/*input=*/t, | ||
/*outDim=*/mlp_layers.at(i + 1), | ||
/*activation=*/Activation::RELU, | ||
/*use_bias=*/true, | ||
/*data_type=*/DataType::FLOAT, | ||
/*projection_initializer=*/projection_initializer, | ||
/*bias_initializer=*/bias_initializer); | ||
} | ||
return t; | ||
} | ||
|
||
tensor_guid_t create_dlrm_sparse_embedding_network(ComputationGraphBuilder &cgb, | ||
DLRMConfig const &config, | ||
tensor_guid_t const &input, | ||
nonnegative_int input_dim, | ||
nonnegative_int output_dim) { | ||
float range = sqrt(1.0f / input_dim); | ||
InitializerAttrs embed_initializer = InitializerAttrs{UniformInitializerAttrs{ | ||
/*seed=*/config.seed, | ||
/*min_val=*/-range, | ||
/*max_val=*/range, | ||
}}; | ||
|
||
tensor_guid_t t = cgb.embedding(input, | ||
/*num_entries=*/input_dim, | ||
/*outDim=*/output_dim, | ||
/*aggr=*/AggregateOp::SUM, | ||
/*dtype=*/DataType::HALF, | ||
/*kernel_initializer=*/embed_initializer); | ||
return cgb.cast(t, DataType::FLOAT); | ||
} | ||
|
||
tensor_guid_t create_dlrm_interact_features( | ||
ComputationGraphBuilder &cgb, | ||
DLRMConfig const &config, | ||
tensor_guid_t const &bottom_mlp_output, | ||
std::vector<tensor_guid_t> const &emb_outputs) { | ||
if (config.arch_interaction_op != DLRMArchInteractionOp::CAT) { | ||
throw mk_runtime_error(fmt::format( | ||
"Currently only arch_interaction_op=DLRMArchInteractionOp::CAT is " | ||
"supported, but found arch_interaction_op={}. If you need support for " | ||
"additional " | ||
"arch_interaction_op value, please create an issue.", | ||
format_as(config.arch_interaction_op))); | ||
} | ||
|
||
return cgb.concat( | ||
/*tensors=*/concat_vectors({bottom_mlp_output}, emb_outputs), | ||
/*axis=*/relative_ff_dim_t{1}); | ||
} | ||
|
||
ComputationGraph get_dlrm_computation_graph(DLRMConfig const &config) { | ||
ComputationGraphBuilder cgb; | ||
|
||
auto create_input_tensor = [&](FFOrdered<nonnegative_int> const &dims, | ||
DataType const &data_type) -> tensor_guid_t { | ||
TensorShape input_shape = TensorShape{ | ||
TensorDims{dims}, | ||
data_type, | ||
}; | ||
return cgb.create_input(input_shape, CreateGrad::YES); | ||
}; | ||
|
||
// Create input tensors | ||
std::vector<tensor_guid_t> sparse_inputs = | ||
repeat(num_elements(config.embedding_size), [&]() { | ||
return create_input_tensor( | ||
{config.batch_size, config.embedding_bag_size}, DataType::INT64); | ||
}); | ||
|
||
tensor_guid_t dense_input = create_input_tensor( | ||
{config.batch_size, config.dense_arch_layer_sizes.front()}, | ||
DataType::FLOAT); | ||
|
||
// Construct the model | ||
tensor_guid_t bottom_mlp_output = create_dlrm_mlp( | ||
/*cgb=*/cgb, | ||
/*config=*/config, | ||
/*input=*/dense_input, | ||
/*mlp_layers=*/config.dense_arch_layer_sizes); | ||
|
||
std::vector<tensor_guid_t> emb_outputs = transform( | ||
zip(config.embedding_size, sparse_inputs), | ||
[&](std::pair<nonnegative_int, tensor_guid_t> const &combined_pair) | ||
-> tensor_guid_t { | ||
return create_dlrm_sparse_embedding_network( | ||
/*cgb=*/cgb, | ||
/*config=*/config, | ||
/*input=*/combined_pair.second, | ||
/*input_dim=*/combined_pair.first, | ||
/*output_dim=*/config.embedding_dim); | ||
}); | ||
|
||
tensor_guid_t interacted_features = create_dlrm_interact_features( | ||
/*cgb=*/cgb, | ||
/*config=*/config, | ||
/*bottom_mlp_output=*/bottom_mlp_output, | ||
/*emb_outputs=*/emb_outputs); | ||
|
||
tensor_guid_t output = create_dlrm_mlp( | ||
/*cgb=*/cgb, | ||
/*config=*/config, | ||
/*input=*/interacted_features, | ||
/*mlp_layers=*/config.over_arch_layer_sizes); | ||
|
||
return cgb.computation_graph; | ||
} | ||
|
||
} // namespace FlexFlow |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
#include "models/dlrm/dlrm.h" | ||
#include "pcg/computation_graph.h" | ||
#include <doctest/doctest.h> | ||
|
||
using namespace ::FlexFlow; | ||
|
||
TEST_SUITE(FF_TEST_SUITE) { | ||
TEST_CASE("get_dlrm_computation_graph") { | ||
DLRMConfig config = get_default_dlrm_config(); | ||
|
||
ComputationGraph result = get_dlrm_computation_graph(config); | ||
|
||
SUBCASE("num layers") { | ||
int result_num_layers = get_layers(result).size(); | ||
int correct_num_layers = 30; | ||
CHECK(result_num_layers == correct_num_layers); | ||
} | ||
} | ||
} |
Oops, something went wrong.