-
Notifications
You must be signed in to change notification settings - Fork 5.8k
Basic PR on Cost Model #35774
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Basic PR on Cost Model #35774
Changes from all commits
267b014
dbae0d9
5c8b1cf
6f0d864
78795e3
548b724
8d30350
361177e
a04e9e2
b738ad4
7b97578
b54288d
81518dc
880cbf5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,256 @@ | ||
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
#include "paddle/fluid/framework/ir/cost_model.h" | ||
|
||
#include <memory> | ||
#include "paddle/fluid/framework/executor.h" | ||
#include "paddle/fluid/framework/scope.h" | ||
#include "paddle/fluid/platform/errors.h" | ||
#include "paddle/fluid/platform/place.h" | ||
|
||
namespace paddle { | ||
namespace framework { | ||
|
||
using ir::Graph; | ||
using platform::Event; | ||
using platform::MemEvent; | ||
|
||
const double CostData::NOT_MEASURED = -1; | ||
|
||
CostData::~CostData() { | ||
// TODO(zhhsplendid): when we save a copy of program/graph, we should delete | ||
// here. | ||
} | ||
|
||
double CostData::GetOpTimeMs(int op_id) const { return op_time_ms_.at(op_id); } | ||
double CostData::GetOpMemoryBytes(int op_id) const { | ||
return op_memory_bytes_.at(op_id); | ||
} | ||
double CostData::GetWholeTimeMs() const { return whole_time_ms_; } | ||
double CostData::GetWholeMemoryBytes() const { return whole_memory_bytes_; } | ||
|
||
const Graph* CostData::GetGraph() const { return graph_; } | ||
const ProgramDesc* CostData::GetProgram() const { return program_; } | ||
|
||
bool CostData::SetCostData(const ProgramDesc& program, | ||
const std::vector<std::vector<Event>>& time_events) { | ||
// TODO(zhhsplendid): Make a copy so that CostData can be available even if | ||
// SWE changes Program, the copy can be saved into pointer program_ | ||
if (program.Size() == 0) { | ||
whole_time_ms_ = 0; | ||
whole_memory_bytes_ = 0; | ||
return true; | ||
} | ||
|
||
if (time_events.empty()) { | ||
LOG(WARNING) << "Input time_events for CostModel is empty"; | ||
return false; | ||
} | ||
|
||
std::vector<Event> main_thread_events = time_events[0]; | ||
// Support global block only | ||
// TODO(zhhsplendid): support sub blocks | ||
const BlockDesc& global_block = program.Block(0); | ||
size_t op_size = global_block.OpSize(); | ||
if (op_size == 0) { | ||
whole_time_ms_ = 0; | ||
whole_memory_bytes_ = 0; | ||
return true; | ||
} | ||
|
||
bool event_to_cost_success = true; | ||
size_t event_index = 0; | ||
for (size_t i = 0; i < op_size; ++i) { | ||
const OpDesc* op_desc = global_block.Op(i); | ||
std::string op_type = op_desc->Type(); | ||
|
||
while (event_index < main_thread_events.size()) { | ||
if (main_thread_events[event_index].name() == op_type && | ||
main_thread_events[event_index].type() == | ||
platform::EventType::kPushRange) { | ||
break; | ||
} | ||
++event_index; | ||
} | ||
if (event_index >= main_thread_events.size()) { | ||
LOG(WARNING) << "Input time_events for Op " << i << ", type '" << op_type | ||
<< "' have wrong format, skip this Op."; | ||
event_to_cost_success = false; | ||
continue; | ||
} | ||
size_t op_push_index = event_index; | ||
|
||
while (event_index < main_thread_events.size()) { | ||
// Is it possible to Push a lot of Ops with same type and then Pop? | ||
// ControlFlow Op can be like that, but this version only support global | ||
// block | ||
// TODO(zhhsplendid): make a more strict mapping between push and pop | ||
if (main_thread_events[event_index].name() == op_type && | ||
main_thread_events[event_index].type() == | ||
platform::EventType::kPopRange) { | ||
break; | ||
} | ||
++event_index; | ||
} | ||
if (event_index >= main_thread_events.size()) { | ||
LOG(WARNING) << "Input time_events for Op " << i << ", type '" << op_type | ||
<< "' have wrong format, skip this Op."; | ||
event_to_cost_success = false; | ||
continue; | ||
} | ||
size_t op_pop_index = event_index; | ||
double cpu_time_ms = main_thread_events[op_push_index].CpuElapsedMs( | ||
main_thread_events[op_pop_index]); | ||
double gpu_time_ms = 0; | ||
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) | ||
gpu_time_ms = main_thread_events[op_push_index].CudaElapsedMs( | ||
main_thread_events[op_pop_index]); | ||
#endif | ||
double time_ms = gpu_time_ms + cpu_time_ms; | ||
op_time_ms_[i] = time_ms; | ||
} | ||
|
||
event_index = 0; | ||
int start_profiler_idx = -1; | ||
int stop_profiler_idx = -1; | ||
while (event_index < main_thread_events.size()) { | ||
if (main_thread_events[event_index].name() == "_start_profiler_") { | ||
start_profiler_idx = event_index; | ||
} else if (main_thread_events[event_index].name() == "_stop_profiler_") { | ||
stop_profiler_idx = event_index; | ||
break; | ||
} | ||
++event_index; | ||
} | ||
if (start_profiler_idx != -1 && stop_profiler_idx != -1) { | ||
double cpu_time_ms = main_thread_events[start_profiler_idx].CpuElapsedMs( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not good now, because we are not sure about profiler |
||
main_thread_events[stop_profiler_idx]); | ||
double gpu_time_ms = 0; | ||
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) | ||
gpu_time_ms = main_thread_events[start_profiler_idx].CudaElapsedMs( | ||
main_thread_events[stop_profiler_idx]); | ||
#endif | ||
whole_time_ms_ = gpu_time_ms + cpu_time_ms; | ||
} else { | ||
LOG(WARNING) << "Input time_events for whole time have wrong format"; | ||
event_to_cost_success = false; | ||
} | ||
|
||
return event_to_cost_success; | ||
} | ||
|
||
void PrintEvents(const std::vector<std::vector<Event>>* time_events, | ||
const std::vector<std::vector<MemEvent>>* mem_events) { | ||
if (time_events != nullptr) { | ||
for (size_t i = 0; i < time_events->size(); ++i) { | ||
for (size_t j = 0; j < (*time_events)[i].size(); ++j) { | ||
VLOG(4) << "Print time event (" << i << ", " << j << ")" << std::endl; | ||
VLOG(4) << (*time_events)[i][j].name() << " " | ||
<< (*time_events)[i][j].attr() << std::endl; | ||
VLOG(4) << "This: " << &(*time_events)[i][j] | ||
<< ", Parent: " << (*time_events)[i][j].parent() << std::endl; | ||
if ((*time_events)[i][j].role() == platform::EventRole::kInnerOp) { | ||
VLOG(4) << "role kInnerOp" << std::endl; | ||
} else if ((*time_events)[i][j].role() == | ||
platform::EventRole::kUniqueOp) { | ||
VLOG(4) << "role kUniqueOp" << std::endl; | ||
} else if ((*time_events)[i][j].role() == | ||
platform::EventRole::kOrdinary) { | ||
VLOG(4) << "role kOrdinary" << std::endl; | ||
} else if ((*time_events)[i][j].role() == | ||
platform::EventRole::kSpecial) { | ||
VLOG(4) << "role kSpecial" << std::endl; | ||
} | ||
|
||
if ((*time_events)[i][j].type() == platform::EventType::kPopRange) { | ||
VLOG(4) << "type kPopRange" << std::endl; | ||
} else if ((*time_events)[i][j].type() == | ||
platform::EventType::kPushRange) { | ||
VLOG(4) << "type kPushRange" << std::endl; | ||
} else if ((*time_events)[i][j].type() == platform::EventType::kMark) { | ||
VLOG(4) << "type kMark" << std::endl; | ||
} | ||
VLOG(4) << std::endl; | ||
} | ||
} | ||
} | ||
if (mem_events != nullptr) { | ||
for (size_t i = 0; i < mem_events->size(); ++i) { | ||
for (size_t j = 0; j < (*mem_events)[i].size(); ++j) { | ||
VLOG(4) << "Print mem event (" << i << ", " << j << ")" << std::endl; | ||
VLOG(4) << (*mem_events)[i][j].annotation() << std::endl; | ||
} | ||
} | ||
} | ||
} | ||
|
||
std::string ToLowerCopy(const std::string& in) { | ||
std::string out(in); | ||
std::transform(out.begin(), out.end(), out.begin(), | ||
[](unsigned char c) { return std::tolower(c); }); | ||
return out; | ||
} | ||
|
||
CostData CostModel::ProfileMeasure( | ||
const ProgramDesc& main_program, const ProgramDesc& startup_program, | ||
const std::string& device, | ||
const std::vector<std::string>& fetch_cost_list) const { | ||
// Currently fetch_cost_list is useless | ||
// TODO(zhhsplendid): support different fetch data | ||
|
||
platform::ProfilerState profiler_state; | ||
platform::Place place; | ||
|
||
std::string device_lower_case = ToLowerCopy(device); | ||
if (device_lower_case == "cpu") { | ||
profiler_state = platform::ProfilerState::kCPU; | ||
place = platform::CPUPlace(); | ||
} else if (device_lower_case == "gpu") { | ||
profiler_state = platform::ProfilerState::kAll; | ||
place = platform::CUDAPlace(); | ||
} else { | ||
PADDLE_THROW(platform::errors::Unimplemented( | ||
"Not support %s in CostModel now", device)); | ||
} | ||
|
||
Executor executor(place); | ||
Scope scope; | ||
executor.Run(startup_program, &scope, /*block_id = */ 0); | ||
|
||
// TODO(zhhsplendid): handle the case that Profiler is already enabled | ||
SetTracerOption(platform::TracerOption::kAllOpDetail); | ||
EnableProfiler(profiler_state); | ||
executor.Run(main_program, &scope, /*block_id = */ 0); | ||
|
||
std::unique_ptr<std::vector<std::vector<Event>>> time_events( | ||
new std::vector<std::vector<Event>>()); | ||
std::unique_ptr<std::vector<std::vector<MemEvent>>> mem_events( | ||
new std::vector<std::vector<MemEvent>>()); | ||
|
||
CompleteProfilerEvents(/*tracer_profile= */ nullptr, time_events.get(), | ||
mem_events.get()); | ||
|
||
// TODO(zhhsplendid): remove debug vlog after this series of work | ||
PrintEvents(time_events.get(), mem_events.get()); | ||
|
||
// Convert events to cost data | ||
CostData cost_data; | ||
cost_data.SetCostData(main_program, *time_events); | ||
|
||
return cost_data; | ||
} | ||
|
||
} // namespace framework | ||
} // namespace paddle |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
#pragma once | ||
|
||
#include <functional> | ||
#include <map> | ||
#include <memory> | ||
#include <string> | ||
#include <unordered_map> | ||
#include <unordered_set> | ||
#include <vector> | ||
|
||
#include "paddle/fluid/framework/ir/graph.h" | ||
#include "paddle/fluid/framework/ir/node.h" | ||
#include "paddle/fluid/framework/program_desc.h" | ||
#include "paddle/fluid/platform/profiler.h" | ||
#include "paddle/fluid/platform/variant.h" | ||
|
||
namespace paddle { | ||
namespace framework { | ||
|
||
class CostData { | ||
public: | ||
CostData() {} | ||
|
||
~CostData(); | ||
|
||
// Support global block only | ||
// TODO(zhhsplendid): add support for sub-block | ||
double GetOpTimeMs(int op_id) const; | ||
double GetOpMemoryBytes(int op_id) const; | ||
double GetWholeTimeMs() const; | ||
double GetWholeMemoryBytes() const; | ||
|
||
const ir::Graph* GetGraph() const; | ||
const ProgramDesc* GetProgram() const; | ||
|
||
// Support Time Event only | ||
// TODO(zhhsplendid): add memory | ||
bool SetCostData( | ||
const ProgramDesc& program, | ||
const std::vector<std::vector<platform::Event>>& time_events); | ||
|
||
static const double NOT_MEASURED; | ||
|
||
private: | ||
ir::Graph* graph_{nullptr}; | ||
ProgramDesc* program_{nullptr}; | ||
std::map<int, double> op_time_ms_; // from Op Node id to time | ||
std::map<int, double> | ||
op_memory_bytes_; // from Op Node id to total memory bytes | ||
std::map<int, double> comm_; // from Op Node id to communicate cost | ||
double whole_time_ms_{ | ||
NOT_MEASURED}; // time cost of the whole program or graph | ||
double whole_memory_bytes_{ | ||
NOT_MEASURED}; // memory cost of the whole program or graph | ||
double whole_comm_{ | ||
NOT_MEASURED}; // communication cost of the whole program or graph | ||
}; | ||
|
||
class CostModel { | ||
public: | ||
CostModel() {} | ||
~CostModel() {} | ||
|
||
CostData ProfileMeasure( | ||
const ProgramDesc& main_program, const ProgramDesc& startup_program, | ||
const std::string& device, | ||
const std::vector<std::string>& fetch_cost_list) const; | ||
}; | ||
|
||
} // namespace framework | ||
} // namespace paddle |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it's better to use
size_t
replaceint
because theglobal_block.OpSize()
returnsize_t
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A little background, we would also support graph in the future, then there will be int node id. We are not sure whether there is negative graph id