Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
cf37296
Rearranged Eager AutoCodeGen directory structure
jim19930609 Dec 3, 2021
9e0e12b
Removed USE_OP in Eager AutoCodeGen
jim19930609 Dec 3, 2021
2ee135d
Enabled generation for Operators without Grad/Inputs/Outputs
jim19930609 Dec 3, 2021
9bc9793
Merge branch 'develop' into eager_dygraph_codegen_debug
jim19930609 Dec 3, 2021
e9fa346
Resolved operators without input
jim19930609 Dec 3, 2021
e25c061
Fixed merge conflicts
jim19930609 Dec 6, 2021
6988b24
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jim19930609 Dec 6, 2021
7a6fd5f
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jim19930609 Dec 6, 2021
d19cd85
Enabled Eager AutoCodeGen for 10+ more operators
jim19930609 Dec 7, 2021
d41a473
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jim19930609 Dec 7, 2021
577a658
Refactored Eager AutoCodeGen with more organized helper objects
jim19930609 Dec 7, 2021
a4b7987
Enabled Eager AutoCodeGen for operators with multiple OpBases
jim19930609 Dec 8, 2021
57b53b2
Adjusted Eager AutoCodeGen to Enable Passing Output Tensor as Input A…
jim19930609 Dec 8, 2021
2321d49
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jim19930609 Dec 8, 2021
30ab71e
Handled Dispensable Inputs/Outputs in Eager AutoCodeGen
jim19930609 Dec 8, 2021
60f9108
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jim19930609 Dec 9, 2021
15ccd63
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jim19930609 Dec 9, 2021
d60f20a
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jim19930609 Dec 9, 2021
ccd0e7c
Adjusted function generation/call between Python-C API & Dygraph API
jim19930609 Dec 9, 2021
ba6488d
Synchronized auto-generated Python-C API with Dygraph Forward Functions
jim19930609 Dec 9, 2021
0a59d97
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jim19930609 Dec 13, 2021
f1a0b9b
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jim19930609 Dec 14, 2021
65e0036
Added safe_initialized interface to EagerTensor for use in processing…
jim19930609 Dec 14, 2021
ea3ef49
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jim19930609 Dec 14, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 11 additions & 26 deletions paddle/fluid/eager/auto_code_generator/eager_generator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1174,7 +1174,7 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents(
FWD_INS_CONTENT_TEMPLATE, input_name, input_name, input_name);
} else {
const char* FWD_INS_CONTENT_TEMPLATE =
" if(%s.initialized()) "
" if(%s.safe_initialized()) "
"ins[\"%s\"] = egr::EagerUtils::SyncToVars(%s)\n;";
generated_function_body += paddle::string::Sprintf(
FWD_INS_CONTENT_TEMPLATE, input_name, input_name, input_name);
Expand All @@ -1196,25 +1196,21 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents(
// in form of shared_ptr<EagerTensor>/vector<shared_ptr<EagerTensor>>
if (output.duplicable()) {
const char* FWD_NUM_ARG_TEMPLATE =
", std::vector<std::shared_ptr<egr::EagerTensor>>& %s";
", std::vector<egr::EagerTensor>& %s";
std::string arg_str =
paddle::string::Sprintf(FWD_NUM_ARG_TEMPLATE, output_var_name);
dygraph_function_args_str += arg_str;

const char* FWD_OUTS_CONTENT_TEMPLATE = "{ \"%s\", %s },";
outs_contents_str += paddle::string::Sprintf(
FWD_OUTS_CONTENT_TEMPLATE, output_name, output_var_name);
} else {
const char* FWD_NUM_ARG_TEMPLATE =
", std::shared_ptr<egr::EagerTensor>& %s";
const char* FWD_NUM_ARG_TEMPLATE = ", egr::EagerTensor& %s";
std::string arg_str =
paddle::string::Sprintf(FWD_NUM_ARG_TEMPLATE, output_var_name);
dygraph_function_args_str += arg_str;

const char* FWD_OUTS_CONTENT_TEMPLATE = "{ \"%s\", {%s} },";
outs_contents_str += paddle::string::Sprintf(
FWD_OUTS_CONTENT_TEMPLATE, output_name, output_var_name);
}
const char* FWD_OUTS_CONTENT_TEMPLATE =
"{ \"%s\", egr::EagerUtils::TrySyncToVars(&%s) },";
outs_contents_str += paddle::string::Sprintf(
FWD_OUTS_CONTENT_TEMPLATE, output_name, output_var_name);

} else {
if (output.duplicable()) {
Expand Down Expand Up @@ -1557,22 +1553,11 @@ static std::string GenerateGradNodeCCContents(
"fwd_outputs_name_pos_map"));

size_t grads_position = fwd_outputs_name_pos_map.at(fwd_name);
std::string grad_ptr_name = fwd_name + "_ptrs";
const char* GET_GRADS_PTR_TEMPLATE =
" std::vector<std::shared_ptr<egr::EagerTensor>> %s;\n"
" for(const auto& t : grads[%d]) {\n "
"%s.emplace_back(std::move(std::make_shared<egr::EagerTensor>(t))"
");"
"\n }\n";
std::string grads_ptr_str =
paddle::string::Sprintf(GET_GRADS_PTR_TEMPLATE, grad_ptr_name,
grads_position, grad_ptr_name);
generated_grad_function_body += grads_ptr_str;
generated_grad_function_body += "\n";

const char* GRAD_OUTS_CONTENT_TEMPLATE = "{ \"%s\", %s },";

const char* GRAD_OUTS_CONTENT_TEMPLATE =
"{ \"%s\", egr::EagerUtils::SyncToVars(grads[%d]) },";
outs_contents_str += paddle::string::Sprintf(
GRAD_OUTS_CONTENT_TEMPLATE, grad_output_name, grad_ptr_name);
GRAD_OUTS_CONTENT_TEMPLATE, grad_output_name, grads_position);

} else {
size_t fwd_input_position = fwd_inputs_name_pos_map.at(fwd_name);
Expand Down
4 changes: 4 additions & 0 deletions paddle/fluid/eager/eager_tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,10 @@ class EagerTensor final {
*/
bool initialized() const { return tensor_->initialized(); }

bool safe_initialized() const {
return initialized() || var_.IsInitialized();
}

/**
* @description: Reset the Tensor implementation
* @param None
Expand Down
24 changes: 24 additions & 0 deletions paddle/fluid/eager/utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,30 @@ std::vector<std::shared_ptr<egr::EagerTensor>> EagerUtils::SyncToVars(
return res;
}

static std::shared_ptr<egr::EagerTensor> TrySyncToVar(
egr::EagerTensor* tensor) {
if (tensor->initialized() || tensor->Var().IsInitialized()) {
tensor->SyncToVar(paddle::framework::proto::VarType_Type_LOD_TENSOR);
}
return std::make_shared<EagerTensor>(*tensor);
}

std::vector<std::shared_ptr<egr::EagerTensor>> EagerUtils::TrySyncToVars(
egr::EagerTensor* tensor) {
return {TrySyncToVar(tensor)};
}

std::vector<std::shared_ptr<egr::EagerTensor>> EagerUtils::TrySyncToVars(
std::vector<egr::EagerTensor>* tensors) {
std::vector<std::shared_ptr<EagerTensor>> res;
size_t num = tensors->size();
res.reserve(num);
for (size_t i = 0; i < num; i++) {
res.emplace_back(TrySyncToVar(&(*tensors)[i]));
}
return res;
}

/* ---- VarBase -> Tensor ---- */
std::vector<std::shared_ptr<egr::EagerTensor>> EagerUtils::SyncToTensors(
const egr::EagerTensor& tensor) {
Expand Down
5 changes: 5 additions & 0 deletions paddle/fluid/eager/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,11 @@ class EagerUtils {
const std::shared_ptr<GradNodeBase>& grad_node);

// Intermidate needed remove this once we don't need legacy
static std::vector<std::shared_ptr<egr::EagerTensor>> TrySyncToVars(
egr::EagerTensor* tensor);
static std::vector<std::shared_ptr<egr::EagerTensor>> TrySyncToVars(
std::vector<egr::EagerTensor>* tensors);

static std::vector<std::shared_ptr<egr::EagerTensor>> SyncToVars(
const egr::EagerTensor& tensor);
static std::vector<std::shared_ptr<egr::EagerTensor>> SyncToVars(
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/pybind/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ if(WITH_PYTHON)
":retry\n"
"ECHO eager_op_function_generator run %build_times% time\n"
"taskkill /f /im eager_op_function_generator.exe 2>NUL\n"
"${op_impl_path}/eager_op_function_generator.exe ${tmp_eager_impl_file}\n"
"${op_impl_path}/eager_op_function_generator.exe ${tmp_eager_impl_file} ${PADDLE_SOURCE_DIR}/paddle/fluid/eager/auto_code_generator/op_list.txt\n"
"if %ERRORLEVEL% NEQ 0 (\n"
" set /a build_times=%build_times%+1\n"
" if %build_times% GEQ 10 (\n"
Expand Down Expand Up @@ -256,7 +256,7 @@ if(WITH_PYTHON)
add_custom_command(OUTPUT ${eager_impl_file}
COMMAND ${CMAKE_COMMAND} -E env "LD_LIBRARY_PATH=$ENV{LD_LIBRARY_PATH}:."
"${CMAKE_CURRENT_BINARY_DIR}/eager_op_function_generator"
"${tmp_eager_impl_file}"
"${tmp_eager_impl_file}" "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/auto_code_generator/op_list.txt"
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_eager_impl_file} ${eager_impl_file}
COMMENT "copy_if_different ${tmp_eager_impl_file} to ${eager_impl_file}"
DEPENDS ${EAGER_OP_IMPL_DEPS}
Expand Down
143 changes: 20 additions & 123 deletions paddle/fluid/pybind/eager_op_function_generator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,126 +32,7 @@
#endif
#include "paddle/fluid/pybind/op_function_generator.h"

std::set<std::string> gen_list = {
"sigmoid", "matmul_v2", "reduce_sum", "elementwise_add", "rsqrt",
"multihead_matmul", "addmm", "gru", "round", "push_dense", "rank_attention",
"fused_embedding_fc_lstm", "where_index", "bicubic_interp", "arg_min",
"tile", "bilinear_tensor_product", "ctc_align",
"pow2_decay_with_linear_warmup", "marker", "split", "fc",
"load", "elementwise_max", "adadelta",
"tan",
"fsp", "where", "logical_xor", "multiclass_nms3", "one_hot_v2",
"sequence_softmax", "affine_channel", "triangular_solve",
"sequence_topk_avg_pooling", "space_to_depth", "reverse",
"fused_embedding_eltwise_layernorm", "expand_v2", "lgamma", "solve",
"deformable_psroi_pooling", "instance_norm", "decode_jpeg", "gather_nd",
"reduce_prod", "matrix_rank", "asin", "lstmp", "iou_similarity",
"huber_loss", "one_hot", "sequence_slice", "lookup_table", "softplus",
"depthwise_conv2d", "fused_fc_elementwise_layernorm",
"sigmoid_cross_entropy_with_logits", "exp", "scatter", "equal_all",
"searchsorted", "fusion_squared_mat_sub", "unique", "log", "conv_shift",
"smooth_l1_loss", "linear_interp_v2",
"temporal_shift", "nce", "mv", "proximal_gd", "memcpy_h2d",
"add_position_encoding", "cosh", "hash", "grad_add", "sign", "prelu",
"linspace", "fill_diagonal", "logsigmoid", "load_combine", "fetch_v2",
"randperm", "sequence_scatter", "partial_sum", "relu6", "conv3d",
"lstm_unit", "not_equal", "transpose2", "uniform_random_batch_size_like",
"unfold", "lrn", "softmax_with_cross_entropy", "isfinite_v2", "bernoulli",
"max_pool3d_with_index", "gaussian_random", "flatten2",
"cvm", "adamax", "masked_select", "range", "bitwise_not", "trace",
"multinomial", "modified_huber_loss", "roll", "squared_l2_distance",
"conv3d_transpose", "share_data", "fake_quantize_abs_max",
"unique_with_counts", "fill", "concat", "fill_zeros_like",
"hierarchical_sigmoid", "isinf_v2", "squeeze", "multiclass_nms2",
"bpr_loss", "fft_c2c", "bicubic_interp_v2", "reshape", "coalesce_tensor",
"roi_align", "reshape2", "reduce_any", "unstack", "scatter_nd_add",
"sequence_reshape", "bilateral_slice", "fill_any_like", "empty",
"pad_constant_like", "pool2d", "size", "imag", "eigh", "stack",
"dgc_momentum",
"generate_proposals_v2", "bitwise_or", "gru_unit",
"sampling_id", "unsqueeze2",
"sequence_enumerate", "fusion_seqconv_eltadd_relu", "bce_loss",
"generate_proposal_labels", "im2sequence", "isinf", "adagrad",
"linear_chain_crf", "retinanet_target_assign", "fusion_group",
"teacher_student_sigmoid_loss", "random_crop", "lookup_table_v2",
"detection_map", "l1_norm", "sqrt", "fused_elemwise_activation",
"slogdeterminant", "share_buffer", "bitwise_and", "diag_embed", "unbind",
"dropout",
"beam_search", "log_loss", "greater_than", "kron", "sigmoid_focal_loss",
"rmsprop", "conv2d", "uniform_random_inplace", "maxout", "linear_interp",
"auc", "logical_or",
"acos", "unpool", "cumprod", "sample_logits", "crop_tensor",
"deformable_conv", "generate_mask_labels", "locality_aware_nms",
"expand_as", "matrix_power", "greater_equal", "generate_proposals",
"bilinear_interp", "inplace_abn", "softshrink", "mul", "data_norm",
"get_tensor_from_selected_rows", "spp", "floor", "gelu",
"retinanet_detection_output", "push_dense", "silu", "sequence_erase",
"real", "nearest_interp_v2", "dgc_clip_by_norm", "squeeze2",
"strided_slice", "conj", "precision_recall", "save",
"fusion_seqexpand_concat_fc", "fake_quantize_range_abs_max",
"depthwise_conv2d_transpose", "positive_negative_pair", "square",
"var_conv_2d", "log1p", "fused_softmax_mask_upper_triangle", "clip_by_norm",
"atan2", "box_decoder_and_assign", "fft_r2c", "roi_pool", "overlap_add",
"fill_constant_batch_size_like", "fill_any", "dequantize_log",
"max_pool2d_with_index", "pad3d", "norm", "viterbi_decode", "mish",
"box_coder", "flatten", "elementwise_mod", "margin_cross_entropy",
"logical_and", "pow", "stanh", "label_smooth", "merged_momentum",
"ascend_trigger", "fused_feedforward", "rpn_target_assign",
"roi_perspective_transform", "expand", "prroi_pool", "pool3d", "memcpy",
"distribute_fpn_proposals", "frame", "bincount", "shape", "group_norm",
"resnet_unit", "sequence_expand_as", "cos_sim", "eigvals", "save_combine",
"class_center_sample", "read_file", "isfinite", "arg_max", "equal",
"fake_dequantize_max_abs", "qr", "anchor_generator", "layer_norm",
"merge_selected_rows", "less_equal",
"fusion_lstm", "lars_momentum", "hard_sigmoid", "isnan",
"elementwise_floordiv", "correlation", "histogram", "gather_tree",
"segment_pool",
"fusion_repeated_fc_relu", "nop",
"expand_as_v2", "filter_by_instag", "nll_loss", "dot", "scale", "ncclBcast",
"shuffle_batch", "ncclReduce", "diag", "multiplex", "leaky_relu",
"allclose",
"elementwise_pow", "prior_box", "p_norm", "unique_consecutive", "lod_reset",
"pad", "sequence_conv", "log10", "set_value", "bitwise_xor", "center_loss",
"randint", "attention_lstm", "uniform_random", "slice", "meshgrid",
"hard_swish", "sin", "mean_iou", "pad2d", "inverse", "spectral_norm",
"shuffle_channel", "psroi_pool", "seed", "ceil", "eig", "reduce_min", "cos",
"ncclAllReduce", "cudnn_lstm", "digamma", "assign_value", "increment",
"tdm_sampler", "fused_softmax_mask", "sequence_reverse", "eigvalsh",
"diagonal", "trunc", "log2", "tanh", "yolov3_loss", "graph_send_recv",
"atan", "less_than", "unsqueeze", "crf_decoding", "log_softmax", "ftrl",
"matrix_nms", "top_k_v2", "cast", "tanh_shrink", "hard_shrink",
"multiclass_nms", "fusion_transpose_flatten_concat", "sequence_unpad",
"fused_elemwise_add_activation", "frobenius_norm", "crop", "cross_entropy2",
"skip_layernorm", "tdm_child", "fused_embedding_seq_pool", "erf",
"conv2d_inception_fusion", "trilinear_interp", "logsumexp",
"fusion_seqpool_concat", "alloc_float_status", "sequence_concat",
"fusion_seqpool_cvm_concat", "similarity_focus", "argsort",
"sequence_expand",
"fused_bn_add_activation", "bilinear_interp_v2", "clip",
"deformable_conv_v1", "hinge_loss", "determinant", "conv2d_transpose",
"memcpy_d2h", "softsign",
"broadcast_tensors", "grid_sampler", "fft_c2r", "pyramid_hash",
"multi_dot", "sequence_pool", "transpose", "top_k", "dist", "affine_grid",
"gaussian_random_batch_size_like", "fake_channel_wise_dequantize_max_abs",
"reciprocal", "sequence_mask", "fill_diagonal_tensor", "abs",
"partial_concat", "elu", "index_select", "row_conv", "cross",
"elementwise_mul", "decayed_adagrad", "bipartite_match",
"fake_quantize_moving_average_abs_max", "mine_hard_examples",
"target_assign", "lstm", "truncated_gaussian_random", "match_matrix_tensor",
"elementwise_div", "kldiv_loss", "cumsum", "sum", "proximal_adagrad",
"shard_index", "selu", "mean", "gumbel_softmax", "sequence_pad",
"tree_conv", "assign", "flatten_contiguous_range", "tril_triu", "brelu",
"celu", "reduce_mean", "sinh", "rank_loss", "reduce_max", "fusion_gru",
"fill_zeros_like2", "expm1", "squared_l2_norm", "elementwise_sub",
"margin_rank_loss", "faster_tokenizer", "relu", "is_empty", "reduce_all",
"edit_distance", "bmm", "yolo_box", "soft_relu", "density_prior_box", "eye",
"swish", "cross_entropy", "dpsgd", "cholesky", "batch_fc", "nearest_interp",
"gather", "trilinear_interp_v2", "box_clip", "isnan_v2", "softmax",
"conv2d_fusion", "fused_batch_norm_act",
"index_sample", "elementwise_min", "logical_not", "collect_fpn_proposals",
"pixel_shuffle", "thresholded_relu", "polygon_box_transform",
"lookup_table_dequant", "warpctc", "fake_channel_wise_quantize_abs_max",
"dequantize_abs_max", "svd", "flip"};
std::set<std::string> gen_list = {};

// clang-format off
const char* OUT_INITIALIZER_TEMPLATE =
Expand Down Expand Up @@ -348,7 +229,7 @@ std::string GenerateOpFunctionsBody(
ins_cast_str += paddle::string::Sprintf(in_cast_type, out_name, op_type,
out_name, arg_idx++, dispensable);

// call_api_str += out_name + ", ";
call_api_str += out_name + ", ";
} else {
// There are few Operators that have duplicable output, like `Out` in
// split op. We need to specify the number of variables for the
Expand Down Expand Up @@ -448,12 +329,28 @@ GenerateOpFunctions() {
return std::make_tuple(op_function_list, bind_function_list);
}

static void CollectOperatorsToCodeGen(const std::string& op_list_path) {
std::string line;
std::ifstream op_list_file(op_list_path);
if (op_list_file.is_open()) {
while (getline(op_list_file, line)) {
gen_list.insert(line);
}
op_list_file.close();
} else {
PADDLE_THROW(
paddle::platform::errors::Fatal("Unable to open op_list.txt file"));
}
}

int main(int argc, char* argv[]) {
if (argc != 2) {
std::cerr << "argc must be 2" << std::endl;
if (argc != 3) {
std::cerr << "argc must be 3" << std::endl;
return -1;
}

CollectOperatorsToCodeGen(argv[2]);

#ifdef PADDLE_WITH_ASCEND_CL
auto ascend_ptr = paddle::framework::AscendInstance::GetInstance();
ascend_ptr->InitGEForUT();
Expand Down