Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Top-level loop for compiler #1576

Open
wants to merge 33 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
0d639ac
initial implmentation of meomry algorithm
wmdi Oct 13, 2024
da857a5
fmt
wmdi Oct 16, 2024
ef8c5c2
pass existing tests
wmdi Oct 16, 2024
982f1f5
initialize memory algorithm
wmdi Oct 31, 2024
01c6a6b
Merge remote-tracking branch 'flexflow/repo-refactor' into memory-alg
wmdi Oct 31, 2024
964c885
fix tests & format
wmdi Oct 31, 2024
0c0e7b0
minimum tests for memory algorithm
wmdi Nov 7, 2024
7778377
renaming
wmdi Dec 18, 2024
0315160
fmt
wmdi Dec 18, 2024
855a7d5
fix
wmdi Dec 30, 2024
2b4e127
rename single machine mapping
wmdi Jan 9, 2025
f72fb6f
Merge branch 'master' into memory-alg
lockshaw Jan 9, 2025
50bae93
format
wmdi Jan 9, 2025
3297d3f
Merge branch 'memory-alg' of github.com:wmdi/FlexFlow into memory-alg
wmdi Jan 9, 2025
d96b678
top-level loop for compiler
wmdi Jan 15, 2025
cd9b031
Merge branch 'master' into memory-alg
lockshaw Jan 15, 2025
1dcaa42
Merge branch 'master' into memory-alg
lockshaw Jan 20, 2025
c16bcf6
fixes
wmdi Jan 21, 2025
2e93e74
Merge branch 'memory-alg' of github.com:wmdi/FlexFlow into memory-alg
wmdi Jan 21, 2025
62389ad
upd
wmdi Jan 22, 2025
6d2fe50
fixes
wmdi Jan 29, 2025
45a931c
fix
wmdi Jan 30, 2025
efc7a9a
Merge remote-tracking branch 'flexflow/master' into memory-alg
wmdi Jan 30, 2025
4f97602
Merge remote-tracking branch 'flexflow/master' into memory-alg
wmdi Feb 12, 2025
14234b4
fix some errors introduced in merge
wmdi Feb 12, 2025
30e51fc
upd
wmdi Feb 20, 2025
ddbace1
Merge remote-tracking branch 'origin/master' into memory-alg
wmdi Feb 25, 2025
eb58e91
add test case for get mm problem tree
wmdi Feb 25, 2025
40c3494
Fix is_valid_machine_mapping_problem_tree, add hacky printing for pro…
lockshaw Feb 26, 2025
962934d
upd
wmdi Feb 27, 2025
550127a
update (#3)
Marsella8 Feb 28, 2025
948d247
fix get_optimal_machine_mapping
wmdi Mar 2, 2025
612bff5
implement divisible_by constarint type in substitutions
wmdi Mar 2, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fix some errors introduced in merge
  • Loading branch information
wmdi committed Feb 12, 2025
commit 14234b49bb88d62802dfcf68f58291860a843308
4 changes: 2 additions & 2 deletions lib/compiler/src/compiler/compiler.cc
Original file line number Diff line number Diff line change
@@ -1,27 +1,27 @@
#include "compiler/compiler.h"
#include "compiler/unity_algorithm/unity_algorithm.h"
#include "pcg/pcg_from_computation_graph.h"
#include "utils/overload.h"

namespace FlexFlow {

SearchResult optimize(ComputationGraph const &computation_graph,

Check warning on line 8 in lib/compiler/src/compiler/compiler.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/compiler/compiler.cc#L8

Added line #L8 was not covered by tests
MachineSpecification const &machine_specification,
CostEstimator const &cost_estimator,
AlgorithmConfig const &search_config) {
return search_config.visit<SearchResult>(overload{
[&](DataParallelismConfig const &config) -> SearchResult {
throw std::runtime_error(
"Data parallel search algorithm is not implemented yet");

Check warning on line 15 in lib/compiler/src/compiler/compiler.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/compiler/compiler.cc#L12-L15

Added lines #L12 - L15 were not covered by tests
},
[&](UnitySearchConfig const &config) {

Check warning on line 17 in lib/compiler/src/compiler/compiler.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/compiler/compiler.cc#L17

Added line #L17 was not covered by tests
ParallelComputationGraph pcg =
parallel_computation_graph_from_computation_graph(
computation_graph);
pcg_from_computation_graph(computation_graph);
std::vector<Substitution> substitutions; // TODO: Implement this

Check warning on line 20 in lib/compiler/src/compiler/compiler.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/compiler/compiler.cc#L19-L20

Added lines #L19 - L20 were not covered by tests
return graph_optimize(
pcg, cost_estimator, machine_specification, substitutions, config);
},
});

Check warning on line 24 in lib/compiler/src/compiler/compiler.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/compiler/compiler.cc#L22-L24

Added lines #L22 - L24 were not covered by tests
}

} // namespace FlexFlow
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "compiler/unity_algorithm/graph_optimize_state.h"
#include "pcg/machine_specification.dtg.h"
#include "pcg/operator_task_space.h"
#include "substitutions/apply_substitution/apply_substitution.h"
#include "substitutions/pcg_pattern.h"
#include "substitutions/sub_parallel_computation_graph.h"
#include "substitutions/substitution.h"
Expand All @@ -26,113 +27,113 @@
* Applies a substitution to all possible positions in PCG
*/
std::vector<ParallelComputationGraph>
all_pcgs_obtained_by_applying_a_substitution(

Check warning on line 30 in lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc#L30

Added line #L30 was not covered by tests
ParallelComputationGraph const &pcg,
std::vector<Substitution> const &substitutions) {
std::vector<ParallelComputationGraph> results;
SubParallelComputationGraph subpcg = sub_pcg_from_full_pcg(pcg);
for (Substitution const &substitution : substitutions) {
for (PCGPatternMatch const &pattern_match :
find_pattern_matches(substitution.pcg_pattern, subpcg)) {

Check warning on line 37 in lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc#L33-L37

Added lines #L33 - L37 were not covered by tests
SubParallelComputationGraph subpcg_from_substitution =
apply_substitution(subpcg, substitution, pattern_match);
results.push_back(
pcg_from_sub_pcg_by_dropping_inputs(subpcg_from_substitution));
}

Check warning on line 42 in lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc#L39-L42

Added lines #L39 - L42 were not covered by tests
}
return results;
}

Check warning on line 45 in lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc#L44-L45

Added lines #L44 - L45 were not covered by tests

SearchResult graph_optimize(ParallelComputationGraph &pcg,

Check warning on line 47 in lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc#L47

Added line #L47 was not covered by tests
CostEstimator const &cost_estimator,
MachineSpecification const &resources,
std::vector<Substitution> const &substitutions,
UnitySearchConfig const &search_config) {

MachineMappingCache cached_subgraph_costs = empty_machine_mapping_cache();
DeduplicatedPriorityQueue<GraphOptimizeState> candidates;

Check warning on line 54 in lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc#L53-L54

Added lines #L53 - L54 were not covered by tests

MachineMappingContext context = MachineMappingContext{
/*cost_estimator=*/cost_estimator,
/*allowed_machine_views=*/
[&](UnmappedOpCostEstimateKey const &key,

Check warning on line 59 in lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc#L59

Added line #L59 was not covered by tests
MachineSpecification const &resources)
-> std::unordered_set<MachineView> {
return get_allowed_machine_views(
resources, key.op_task_space, DeviceType::GPU);

Check warning on line 63 in lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc#L63

Added line #L63 was not covered by tests
},
};

Check warning on line 65 in lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc#L65

Added line #L65 was not covered by tests

auto optimize_pcg = [&](ParallelComputationGraph const &pcg)

Check warning on line 67 in lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc#L67

Added line #L67 was not covered by tests
-> std::pair<GraphOptimizeState, std::optional<MachineMapping>> {
PCGBinarySPDecomposition sp_decomp =
expect(get_pcg_balanced_binary_sp_decomposition(pcg),
"Failed to get SP decomposition of PCG");

Check warning on line 71 in lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc#L70-L71

Added lines #L70 - L71 were not covered by tests

MachineMappingProblemTree problem_tree =
get_machine_mapping_problem_tree(pcg, sp_decomp);

Check warning on line 74 in lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc#L74

Added line #L74 was not covered by tests
MachineMappingConstraints constraints =
get_unconstrained_solution_for_layers(get_all_leaf_paths(problem_tree));

Check warning on line 76 in lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc#L76

Added line #L76 was not covered by tests

MachineMappingResult mm_result = get_optimal_machine_mapping(
cached_subgraph_costs,
context,
get_machine_mapping_problem_tree(pcg, sp_decomp),
resources,
constraints);

Check warning on line 83 in lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc#L79-L83

Added lines #L79 - L83 were not covered by tests

return {
GraphOptimizeState{

Check warning on line 86 in lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc#L86

Added line #L86 was not covered by tests
/*pcg=*/pcg,
/*runtime_with_optimal_mm=*/get_runtime_cost(mm_result),
},
get_machine_mapping_from_machine_mapping_result(sp_decomp, mm_result),
};
};

Check warning on line 92 in lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc#L90-L92

Added lines #L90 - L92 were not covered by tests

GraphOptimizeState best_state = optimize_pcg(pcg).first;
candidates.push(best_state);

Check warning on line 95 in lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc#L94-L95

Added lines #L94 - L95 were not covered by tests

for (int iteration = 0;
!candidates.empty() && iteration < search_config.budget;

Check warning on line 98 in lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc#L97-L98

Added lines #L97 - L98 were not covered by tests
++iteration) {
GraphOptimizeState current_state = candidates.top();
candidates.pop();

Check warning on line 101 in lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc#L100-L101

Added lines #L100 - L101 were not covered by tests

if (current_state < best_state) {
best_state = current_state;
} else if (current_state.runtime_with_optimal_mm >
best_state.runtime_with_optimal_mm * search_config.alpha) {
continue;

Check warning on line 107 in lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc#L103-L107

Added lines #L103 - L107 were not covered by tests
}

for (ParallelComputationGraph const &new_pcg :

Check warning on line 110 in lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc#L110

Added line #L110 was not covered by tests
all_pcgs_obtained_by_applying_a_substitution(current_state.pcg,
substitutions)) {

Check warning on line 112 in lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc#L112

Added line #L112 was not covered by tests
std::optional<GraphOptimizeState> new_pcg_optimize_result =
optimize_pcg(new_pcg).first;
if (new_pcg_optimize_result == std::nullopt) {
continue;

Check warning on line 116 in lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc#L114-L116

Added lines #L114 - L116 were not covered by tests
}
GraphOptimizeState new_state = new_pcg_optimize_result.value();
if (new_state.runtime_with_optimal_mm <= search_config.threshold &&
get_nodes(new_pcg.raw_graph).size() <= search_config.max_num_ops) {
candidates.push(new_state);

Check warning on line 121 in lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc#L118-L121

Added lines #L118 - L121 were not covered by tests
}
}
}

Check warning on line 124 in lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc#L123-L124

Added lines #L123 - L124 were not covered by tests

std::optional<MachineMapping> best_mapping =
optimize_pcg(best_state.pcg).second;

Check warning on line 127 in lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc#L127

Added line #L127 was not covered by tests

if (best_mapping == std::nullopt) {
throw std::runtime_error("Failed to find any solutions");

Check warning on line 130 in lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc#L129-L130

Added lines #L129 - L130 were not covered by tests
}

return SearchResult{
/*pcg=*/best_state.pcg,
/*machine_mapping=*/best_mapping.value(),
};
}

Check warning on line 137 in lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc

View check run for this annotation

Codecov / codecov/patch

lib/compiler/src/compiler/unity_algorithm/unity_algorithm.cc#L135-L137

Added lines #L135 - L137 were not covered by tests

} // namespace FlexFlow
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ TEST_SUITE(FF_TEST_SUITE) {
PCGOperatorAttrs input_attrs = PCGOperatorAttrs{InputAttrs{}};

auto make_operator_task_space = [&](ParallelTensorShape const &shape) {
std::vector<int> degrees;
std::vector<nonnegative_int> degrees;
extend(degrees, vector_of(ff_ordered_shard_degrees(shape)));
degrees.push_back(get_sum_degree(shape));
degrees.push_back(get_discard_copy_degree(shape));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,13 @@ TEST_SUITE(FF_TEST_SUITE) {

ParallelTensorShape input_shape = ParallelTensorShape{
ParallelTensorDims{
FFOrdered<ShardParallelDim>{
ShardParallelDim{10, 1},
},
FFOrdered<ShardParallelDim>{ShardParallelDim{
nonnegative_int{10},
nonnegative_int{1},
}},
ReplicaParallelDimSet{
SumDegree{1},
DiscardCopyDegree{1},
SumDegree{nonnegative_int{1}},
DiscardCopyDegree{nonnegative_int{1}},
},
},
DataType::FLOAT,
Expand Down
74 changes: 18 additions & 56 deletions lib/compiler/test/src/compiler/unity_algorithm/unity_algorithm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,81 +7,43 @@
#include "op-attrs/shard_parallel_dim.h"
#include "pcg/computation_graph_builder.h"
#include "pcg/parallel_computation_graph/parallel_computation_graph_builder.h"
#include "pcg/pcg_from_computation_graph.h"
#include "utils/integer_conversions.h"

using namespace FlexFlow;

TEST_SUITE(FF_TEST_SUITE) {
TEST_CASE("graph_optimize") {
// TODO: recover this by implementing
// parallel_computation_graph_from_computation_graph ComputationGraph cg =
// [&] {
// ComputationGraphBuilder b;
// TensorShape input_tensor_shape = TensorShape{
// TensorDims{
// FFOrdered<size_t> {32, 64},
// },
// DataType::FLOAT,
// };
// tensor_guid_t t = b.create_input(input_tensor_shape, CreateGrad::YES);
// t = b.dense(t,
// /*outDim=*/16,
// /*activation=*/std::nullopt);
// t = b.gelu(t);
// t = b.dense(t,
// /*outDim=*/12,
// /*activation=*/std::nullopt,
// /*use_bias=*/false,
// /*data_type=*/DataType::FLOAT,
// /*kernel_initializer=*/std::nullopt,
// /*bias_initializer=*/std::nullopt);
// t = b.relu(t);
// t = b.dense(t,
// /*outDim=*/8,
// /*activation=*/Activation::RELU);
// return b.computation_graph;
// }();

// ParallelComputationGraph pcg =
// parallel_computation_graph_from_computation_graph(cg);

ParallelComputationGraph pcg = [&] {
ParallelComputationGraphBuilder b;
int in_channels = 24;
int batch_size = 4;
int batch_degree = 2;
parallel_tensor_guid_t t = b.create_input_tensor(ParallelTensorShape{
ParallelTensorDims{
FFOrdered<ShardParallelDim>{
ShardParallelDim{size_t_from_int(batch_size), batch_degree},
ShardParallelDim{size_t_from_int(in_channels), 1},
},
ReplicaParallelDimSet{
SumDegree{1},
DiscardCopyDegree{1},
},
ComputationGraph cg = [&] {
ComputationGraphBuilder b;
TensorShape input_tensor_shape = TensorShape{
TensorDims{
FFOrdered<nonnegative_int>{nonnegative_int{32},
nonnegative_int{64}},
},
DataType::FLOAT,
});
};
tensor_guid_t t = b.create_input(input_tensor_shape, CreateGrad::YES);
t = b.dense(t,
/*outDim=*/16,
/*outDim=*/nonnegative_int{16},
/*activation=*/std::nullopt);
t = b.gelu(t);
t = b.dense(t,
/*outDim=*/12,
/*outDim=*/nonnegative_int{12},
/*activation=*/std::nullopt,
/*use_bias=*/false,
/*data_type=*/DataType::FLOAT,
/*kernel_initializer=*/std::nullopt,
/*bias_initializer=*/std::nullopt);
t = b.relu(t);
t = b.dense(t,
/*outDim=*/8,
/*outDim=*/nonnegative_int{8},
/*activation=*/Activation::RELU);

return b.pcg;
return b.computation_graph;
}();

ParallelComputationGraph pcg = pcg_from_computation_graph(cg);

CostEstimator cost_estimator = make_fake_cost_estimator(
[](OpCostEstimateKey const &k) {
return OpCostMetrics{
Expand All @@ -93,9 +55,9 @@ TEST_SUITE(FF_TEST_SUITE) {
[](TensorSetMovement const &) { return 1.0; });

MachineSpecification full_machine_spec = MachineSpecification{
/*num_nodes=*/2,
/*num_cpus_per_node=*/1,
/*num_gpus_per_node=*/1,
/*num_nodes=*/nonnegative_int{2},
/*num_cpus_per_node=*/nonnegative_int{1},
/*num_gpus_per_node=*/nonnegative_int{1},
/*inter_node_bandwidth=*/1,
/*intra_node_bandwidth=*/1,
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,6 @@ ParallelComputationGraph without_layer_names(ParallelComputationGraph const &);
bool pcgs_are_isomorphic(ParallelComputationGraph const &,
ParallelComputationGraph const &);

ParallelComputationGraph
parallel_computation_graph_from_computation_graph(ComputationGraph const &);

} // namespace FlexFlow

#endif
Original file line number Diff line number Diff line change
Expand Up @@ -247,9 +247,4 @@ bool pcgs_are_isomorphic(ParallelComputationGraph const &lhs,
.has_value();
}

ParallelComputationGraph parallel_computation_graph_from_computation_graph(
ComputationGraph const &) {
NOT_IMPLEMENTED();
}

} // namespace FlexFlow
Loading