Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions paddle/fluid/inference/analysis/argument.h
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,11 @@ struct Argument {
// cinn compiler related
DECL_ARGUMENT_FIELD(use_cinn_compiler, UseCinnCompiler, bool);

// custom device
DECL_ARGUMENT_FIELD(use_custom_device, UseCustomDevice, bool);
DECL_ARGUMENT_FIELD(custom_device_type, CustomDeviceType, std::string);
DECL_ARGUMENT_FIELD(custom_device_id, CustomDeviceId, int);

private:
std::unordered_set<std::string> valid_fields_;
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#include "paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h"

#include <cstdlib>
#include <string>
#include <unordered_set>

Expand All @@ -26,6 +27,11 @@
#include "paddle/fluid/platform/enforce.h"
#include "paddle/phi/common/data_type.h"

DEFINE_bool(
custom_model_save_cpu,
false,
"Keep old mode for developers, the model is saved on cpu not device.");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

根据后代码的使用,这里建议将 custom_model_save_cpu flag 改成 bool 类型,DEFINE_bool 就行

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@qili93 已经改为bool


namespace paddle {
namespace inference {
namespace analysis {
Expand Down Expand Up @@ -71,9 +77,9 @@ void IrParamsSyncAmongDevicesPass::CopyParamsToNpu(Argument *argument) {
}
}
}
#endif

#else

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
void IrParamsSyncAmongDevicesPass::CopyParamsToGpu(Argument *argument) {
// The parameters are on the cpu, therefore, synchronization is not necessary.
if (!argument->use_gpu()) return;
Expand Down Expand Up @@ -148,21 +154,83 @@ void IrParamsSyncAmongDevicesPass::CopyParamsToGpu(Argument *argument) {
}
}
}
#endif

#ifdef PADDLE_WITH_CUSTOM_DEVICE
void IrParamsSyncAmongDevicesPass::CopyParamsToCustomDevice(
Argument *argument) {
if (!argument->use_custom_device()) return;

// On old mode, the model is saved on cpu not device.
if (argument->custom_device_type() == "OpenCL") {
PADDLE_ENFORCE_EQ(
FLAGS_custom_model_save_cpu,
false,
phi::errors::InvalidArgument(
"'FLAGS_custom_model_save_cpu = false' is only for the developers "
"who have not completed custom device memory settings. Setting to "
"true will make "
"model memory reserve on the cpu, and make inference slower."));
}

if (FLAGS_custom_model_save_cpu) return;

auto &graph = argument->main_graph();
std::vector<std::string> repetitive_params;

if (graph.Has(framework::ir::kRepetitiveParamAttr))
repetitive_params = graph.Get<std::vector<std::string>>(
framework::ir::kRepetitiveParamAttr);

LOG(INFO) << "Sync params from CPU to CustomDevice"
<< argument->custom_device_type() << "/"
<< argument->custom_device_id();

platform::Place place = platform::CustomPlace(argument->custom_device_type(),
argument->custom_device_id());
auto *scope = argument->scope_ptr();
std::vector<std::string> all_vars = scope->LocalVarNames();

for (auto &var_name : all_vars) {
auto *var = scope->FindLocalVar(var_name);
PADDLE_ENFORCE_NOT_NULL(
var,
platform::errors::PreconditionNotMet("The var should not be nullptr"));

if (var->IsType<phi::DenseTensor>() || var->IsType<phi::DenseTensor>()) {
auto *t = var->GetMutable<phi::DenseTensor>();

platform::CPUPlace cpu_place;
phi::DenseTensor temp_tensor;
temp_tensor.Resize(t->dims());

paddle::framework::TensorCopySync(*t, cpu_place, &temp_tensor);
t->clear();
paddle::framework::TensorCopySync(temp_tensor, place, t);
}
}
}
#endif

void IrParamsSyncAmongDevicesPass::RunImpl(Argument *argument) {
PADDLE_ENFORCE_EQ(
argument->scope_valid(),
true,
platform::errors::PreconditionNotMet("The scope field should be valid"));

#ifdef PADDLE_WITH_ASCEND_CL
if (!argument->use_npu_valid()) return;
CopyParamsToNpu(argument);
#else
if (!argument->use_gpu_valid()) return;
CopyParamsToGpu(argument);
if (argument->use_npu_valid()) {
CopyParamsToNpu(argument);
}
#endif
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if (argument->use_gpu_valid()) {
CopyParamsToGpu(argument);
}
#endif
#ifdef PADDLE_WITH_CUSTOM_DEVICE
if (argument->use_custom_device_valid()) {
CopyParamsToCustomDevice(argument);
}
#endif
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,15 @@ class IrParamsSyncAmongDevicesPass : public AnalysisPass {
private:
#ifdef PADDLE_WITH_ASCEND_CL
void CopyParamsToNpu(Argument *argument);
#else
#endif

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
void CopyParamsToGpu(Argument *argument);
#endif

#ifdef PADDLE_WITH_CUSTOM_DEVICE
void CopyParamsToCustomDevice(Argument *argument);
#endif
};

} // namespace analysis
Expand Down
9 changes: 9 additions & 0 deletions paddle/fluid/inference/api/analysis_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1242,6 +1242,15 @@ void AnalysisPredictor::PrepareArgument() {
}
#endif

#ifdef PADDLE_WITH_CUSTOM_DEVICE
argument_.SetUseCustomDevice(config_.use_custom_device());
if (config_.use_custom_device()) {
LOG(INFO) << "CustomDevice is enabled";
argument_.SetCustomDeviceType(config_.custom_device_type());
argument_.SetCustomDeviceId(config_.custom_device_id());
}
#endif

auto *pass_builder = config_.pass_builder();
// TODO(inference): Need to reconstruct the pass_builder, pass should be
// processed in a single
Expand Down