Skip to content

Commit

Permalink
Incremental compilation optimization of phi (PaddlePaddle#59811)
Browse files Browse the repository at this point in the history
* mv phi/ops/.cc file to fluid

* fix phi/kernels/compile bug

* optimization cmake code

* optimiztion compiliation of phi

* tmp for gen fpaintb.py

* refine gen mxigemm py

* refine gen mxigemm py

* optimiztion compiliation of phi

* optimiztion compiliation of phi

* optimization compilation of phi

* optimization compilation of phi

---------

Co-authored-by: wwbitejotunn <wang_bojun@outlook.com>
  • Loading branch information
risemeup1 and wwbitejotunn authored Dec 11, 2023
1 parent c140f91 commit b3d4ce3
Show file tree
Hide file tree
Showing 95 changed files with 122 additions and 71 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ paddle/fluid/operators/generated_op*.cc
paddle/fluid/operators/generated_sparse_op.cc
paddle/fluid/operators/generated_static_op.cc
paddle/fluid/operators/generated_fused_op.cc
paddle/phi/ops/compat/generated_*.cc
paddle/fluid/operators/ops_signature/generated_*.cc
paddle/phi/api/yaml/parsed_apis/
paddle/fluid/operators/generator/parsed_ops/
paddle/fluid/pybind/tmp_eager_op_function_impl.h
Expand All @@ -95,9 +95,11 @@ paddle/fluid/framework/__init__.py
paddle/phi/api/profiler/__init__.py
python/paddle/incubate/fleet/parameter_server/pslib/ps_pb2.py
paddle/phi/kernels/fusion/cutlass/conv2d/generated/*
paddle/phi/kernels/fusion/cutlass/conv2d/generated_tmp/*
python/paddle/base/incubate/fleet/parameter_server/pslib/ps_pb2.py
paddle/fluid/ir_adaptor/translator/op_compat_info.cc
paddle/phi/kernels/fusion/cutlass/cutlass_kernels/fpA_intB_gemm/autogen/*
paddle/phi/kernels/fusion/cutlass/cutlass_kernels/fpA_intB_gemm/autogen_tmp/*
paddle/fluid/pybind/static_op_function.*
paddle/fluid/pybind/ops_api.cc
paddle/fluid/pir/dialect/operator/ir/pd_api.*
Expand Down
50 changes: 50 additions & 0 deletions cmake/operators.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -734,3 +734,53 @@ function(prune_pybind_h)
endif()
endforeach()
endfunction()

function(append_op_util_declare TARGET)
file(READ ${TARGET} target_content)
string(REGEX MATCH "(PD_REGISTER_ARG_MAPPING_FN)\\([ \t\r\n]*[a-z0-9_]*"
util_registrar "${target_content}")
if(NOT ${util_registrar} EQUAL "")
string(REPLACE "PD_REGISTER_ARG_MAPPING_FN" "PD_DECLARE_ARG_MAPPING_FN"
util_declare "${util_registrar}")
string(APPEND util_declare ");\n")
file(APPEND ${op_utils_header} "${util_declare}")
endif()
endfunction()

function(append_op_kernel_map_declare TARGET)
file(READ ${TARGET} target_content)
string(
REGEX
MATCH
"(PD_REGISTER_BASE_KERNEL_NAME)\\([ \t\r\n]*[a-z0-9_]*,[ \\\t\r\n]*[a-z0-9_]*"
kernel_mapping_registrar
"${target_content}")
if(NOT ${kernel_mapping_registrar} EQUAL "")
string(REPLACE "PD_REGISTER_BASE_KERNEL_NAME" "PD_DECLARE_BASE_KERNEL_NAME"
kernel_mapping_declare "${kernel_mapping_registrar}")
string(APPEND kernel_mapping_declare ");\n")
file(APPEND ${op_utils_header} "${kernel_mapping_declare}")
endif()
endfunction()

function(register_op_utils TARGET_NAME)
set(utils_srcs)
set(options "")
set(oneValueArgs "")
set(multiValueArgs EXCLUDES DEPS)
cmake_parse_arguments(register_op_utils "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN})

file(GLOB SIGNATURES
"${PADDLE_SOURCE_DIR}/paddle/fluid/operators/ops_signature/*_sig.cc")
foreach(target ${SIGNATURES})
append_op_util_declare(${target})
append_op_kernel_map_declare(${target})
list(APPEND utils_srcs ${target})
endforeach()

cc_library(
${TARGET_NAME}
SRCS ${utils_srcs}
DEPS ${register_op_utils_DEPS})
endfunction()
49 changes: 0 additions & 49 deletions cmake/phi.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -172,55 +172,6 @@ function(kernel_declare TARGET_LIST)
endforeach()
endfunction()

function(append_op_util_declare TARGET)
file(READ ${TARGET} target_content)
string(REGEX MATCH "(PD_REGISTER_ARG_MAPPING_FN)\\([ \t\r\n]*[a-z0-9_]*"
util_registrar "${target_content}")
if(NOT ${util_registrar} EQUAL "")
string(REPLACE "PD_REGISTER_ARG_MAPPING_FN" "PD_DECLARE_ARG_MAPPING_FN"
util_declare "${util_registrar}")
string(APPEND util_declare ");\n")
file(APPEND ${op_utils_header} "${util_declare}")
endif()
endfunction()

function(append_op_kernel_map_declare TARGET)
file(READ ${TARGET} target_content)
string(
REGEX
MATCH
"(PD_REGISTER_BASE_KERNEL_NAME)\\([ \t\r\n]*[a-z0-9_]*,[ \\\t\r\n]*[a-z0-9_]*"
kernel_mapping_registrar
"${target_content}")
if(NOT ${kernel_mapping_registrar} EQUAL "")
string(REPLACE "PD_REGISTER_BASE_KERNEL_NAME" "PD_DECLARE_BASE_KERNEL_NAME"
kernel_mapping_declare "${kernel_mapping_registrar}")
string(APPEND kernel_mapping_declare ");\n")
file(APPEND ${op_utils_header} "${kernel_mapping_declare}")
endif()
endfunction()

function(register_op_utils TARGET_NAME)
set(utils_srcs)
set(options "")
set(oneValueArgs "")
set(multiValueArgs EXCLUDES DEPS)
cmake_parse_arguments(register_op_utils "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN})

file(GLOB SIGNATURES "${PADDLE_SOURCE_DIR}/paddle/phi/ops/compat/*_sig.cc")
foreach(target ${SIGNATURES})
append_op_util_declare(${target})
append_op_kernel_map_declare(${target})
list(APPEND utils_srcs ${target})
endforeach()

cc_library(
${TARGET_NAME}
SRCS ${utils_srcs}
DEPS ${register_op_utils_DEPS})
endfunction()

function(prune_declaration_h)
set(kernel_list ${KERNEL_LIST})
file(STRINGS ${kernel_declare_file} kernel_registry_list)
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/framework/operator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ limitations under the License. */
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/operators/isfinite_op.h"
#include "paddle/fluid/operators/ops_extra_info.h"
#include "paddle/fluid/operators/ops_signature/signatures.h"
#include "paddle/fluid/platform/device/device_wrapper.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/profiler.h"
Expand All @@ -41,7 +42,6 @@ limitations under the License. */
#include "paddle/phi/core/flags.h"
#include "paddle/phi/core/kernel_context.h"
#include "paddle/phi/core/kernel_factory.h"
#include "paddle/phi/ops/compat/signatures.h"
#include "paddle/utils/flags.h"

namespace phi {
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/framework/phi_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ KernelArgsNameMakerByOpProto::GetInputArgsNames() {
continue;
}
// If contains dispensable input, we should override the
// OpArgumentMapping method self in phi/ops/compat dir
// OpArgumentMapping method self in fluid/operators/ops_signature dir
if (in.has_dispensable() && in.dispensable()) {
continue;
}
Expand Down
17 changes: 10 additions & 7 deletions paddle/fluid/operators/generator/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# phi auto cmake utils
include(phi)
include(operators)

# set yaml file path
set(op_yaml_file ${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/ops.yaml)
Expand Down Expand Up @@ -115,13 +115,16 @@ set(generated_fused_op_path
set(generated_sparse_ops_path
${CMAKE_SOURCE_DIR}/paddle/fluid/operators/generated_sparse_op.cc)
set(generated_argument_mapping_path
${CMAKE_SOURCE_DIR}/paddle/phi/ops/compat/generated_sig.cc)
${CMAKE_SOURCE_DIR}/paddle/fluid/operators/ops_signature/generated_sig.cc)
set(generated_fused_argument_mapping_path
${CMAKE_SOURCE_DIR}/paddle/phi/ops/compat/generated_fused_sig.cc)
${CMAKE_SOURCE_DIR}/paddle/fluid/operators/ops_signature/generated_fused_sig.cc
)
set(generated_static_argument_mapping_path
${CMAKE_SOURCE_DIR}/paddle/phi/ops/compat/generated_static_sig.cc)
${CMAKE_SOURCE_DIR}/paddle/fluid/operators/ops_signature/generated_static_sig.cc
)
set(generated_sparse_argument_mapping_path
${CMAKE_SOURCE_DIR}/paddle/phi/ops/compat/generated_sparse_sig.cc)
${CMAKE_SOURCE_DIR}/paddle/fluid/operators/ops_signature/generated_sparse_sig.cc
)
execute_process(
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/paddle/fluid/operators/generator
COMMAND ${CMAKE_COMMAND} -E make_directory ${parsed_op_dir} RESULTS_VARIABLE
Expand Down Expand Up @@ -342,10 +345,10 @@ execute_process(
message("generate ${ops_extra_info_file}")

set(op_utils_header
${PADDLE_BINARY_DIR}/paddle/phi/ops/compat/signatures.h.tmp
${PADDLE_BINARY_DIR}/paddle/fluid/operators/ops_signature/signatures.h.tmp
CACHE INTERNAL "op_args_fns.cc file")
set(op_utils_header_final
${PADDLE_BINARY_DIR}/paddle/phi/ops/compat/signatures.h)
${PADDLE_BINARY_DIR}/paddle/fluid/operators/ops_signature/signatures.h)
file(
WRITE ${op_utils_header}
"// Generated by the paddle/fluid/operators/generator/CMakeLists.txt. DO NOT EDIT!\n\n"
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
55 changes: 50 additions & 5 deletions paddle/phi/kernels/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,33 @@ endif()
if(WITH_CUTLASS)
execute_process(
COMMAND ${CMAKE_COMMAND} -E make_directory
"${CMAKE_CURRENT_SOURCE_DIR}/fusion/cutlass/conv2d/generated"
"${CMAKE_CURRENT_SOURCE_DIR}/fusion/cutlass/conv2d/generated_tmp"
COMMAND ${PYTHON_EXECUTABLE} "conv2d_bias_act.py"
COMMAND ${PYTHON_EXECUTABLE} "conv2d_bias_residual.py"
COMMAND ${PYTHON_EXECUTABLE} "conv2d_depthwise_bias_act.py"
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/fusion/cutlass/conv2d")
set(generated_tmp_dir
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/cutlass/conv2d/generated_tmp
)
set(generated_dir
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/cutlass/conv2d/generated)
file(GLOB con2d_generated_files ${generated_tmp_dir}/*.cu)

if(EXISTS ${generated_dir})
foreach(gen_file ${con2d_generated_files})
string(REPLACE "generated_tmp" "generated" now_file ${gen_file})
execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different
"${gen_file}" "${now_file}")
endforeach()
message("copy if different ${generated_dir}")
else()
foreach(gen_file ${con2d_generated_files})
string(REPLACE "generated_tmp" "generated" now_file ${gen_file})
execute_process(COMMAND ${CMAKE_COMMAND} -E copy "${gen_file}"
"${now_file}")
endforeach()
message("copy ${generated_dir}")
endif()

execute_process(
COMMAND
Expand Down Expand Up @@ -145,17 +167,39 @@ if(WITH_CUTLASS)
)

execute_process(
COMMAND
${CMAKE_COMMAND} -E remove_directory
"${CMAKE_CURRENT_SOURCE_DIR}/fusion/cutlass/cutlass_kernels/fpA_intB_gemm/autogen"
COMMAND
${CMAKE_COMMAND} -E make_directory
"${CMAKE_CURRENT_SOURCE_DIR}/fusion/cutlass/cutlass_kernels/fpA_intB_gemm/autogen"
"${CMAKE_CURRENT_SOURCE_DIR}/fusion/cutlass/cutlass_kernels/fpA_intB_gemm/autogen_tmp"
COMMAND ${PYTHON_EXECUTABLE} generic_mixed_gemm_kernelLauncher.py
--cuda_arch "${NVCC_ARCH_BIN}"
WORKING_DIRECTORY
"${CMAKE_CURRENT_SOURCE_DIR}/fusion/cutlass/cutlass_kernels/fpA_intB_gemm"
)
set(fpA_intB_gemm_autogen_tmp_dir
${CMAKE_CURRENT_SOURCE_DIR}/fusion/cutlass/cutlass_kernels/fpA_intB_gemm/autogen_tmp
)
set(fpA_intB_gemm_autogen_dir
${CMAKE_CURRENT_SOURCE_DIR}/fusion/cutlass/cutlass_kernels/fpA_intB_gemm/autogen
)

file(GLOB fpA_intB_gemm_autogen_files ${fpA_intB_gemm_autogen_tmp_dir}/*.h
${fpA_intB_gemm_autogen_tmp_dir}/*.cu)

if(EXISTS ${fpA_intB_gemm_autogen_dir})
foreach(gen_file ${fpA_intB_gemm_autogen_files})
string(REPLACE "autogen_tmp" "autogen" now_file ${gen_file})
execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different
"${gen_file}" "${now_file}")
endforeach()
message("copy if different ${fpA_intB_gemm_autogen_dir}")
else()
foreach(gen_file ${fpA_intB_gemm_autogen_files})
string(REPLACE "autogen_tmp" "autogen" now_file ${gen_file})
execute_process(COMMAND ${CMAKE_COMMAND} -E copy "${gen_file}"
"${now_file}")
endforeach()
message("copy ${fpA_intB_gemm_autogen_dir}")
endif()

file(
GLOB cutlass_cu
Expand All @@ -167,6 +211,7 @@ if(WITH_CUTLASS)
"fusion/cutlass/memory_efficient_attention/autogen_variable/impl/*.cu"
"fusion/cutlass/cutlass_kernels/fpA_intB_gemm/autogen/*.cu"
"fusion/cutlass/cutlass_kernels/fpA_intB_gemm/*.cu")

list(APPEND kernel_cu ${cutlass_cu})
endif()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,6 @@ def generate_sm75_1688():
sm_versions, SupportedAct, UnderScoreName, CamelName
)
all_code += CommonTail
with open("generated/conv2d_bias_act.cu", "w") as f:
with open("generated_tmp/conv2d_bias_act.cu", "w") as f:
f.write(all_code)
f.close()
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,6 @@ def generate_sm75_1688():
sm_versions, SupportedEpilogue, UnderScoreName, CamelName
)
all_code += CommonTail
with open("generated/conv2d_bias_residual.cu", "w") as f:
with open("generated_tmp/conv2d_bias_residual.cu", "w") as f:
f.write(all_code)
f.close()
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,6 @@ def generate_conv2d_depthwise():
all_code = cdba_header
all_code += generate_conv2d_depthwise()
all_code += CommonTail
with open("generated/conv2d_depthwise_bias_act.cu", "w") as f:
with open("generated_tmp/conv2d_depthwise_bias_act.cu", "w") as f:
f.write(all_code)
f.close()
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ def generate_source_cu(
args = parse_args()
archs = args.cuda_arch
header_all = DefineHeader
header_name = "autogen/arch_define.h"
header_name = "autogen_tmp/arch_define.h"
if archs:
for arch in archs:
define_line = "#define USE_FPAINTB_GEMM_WITH_SM%s\n" % str(arch)
Expand All @@ -217,7 +217,7 @@ def generate_source_cu(
for arch in archs:
for epilogue_tag in EpilogueTags.keys():
for stages in StagesList[arch]:
file_name = "autogen/generic_mixed_gemm_kernelLauncher_{}_sm{}_stages{}_{}.cu".format(
file_name = "autogen_tmp/generic_mixed_gemm_kernelLauncher_{}_sm{}_stages{}_{}.cu".format(
element_type, arch, stages, epilogue_tag
)
all_code = generate_source_cu(
Expand Down
2 changes: 1 addition & 1 deletion test/cpp/phi/core/test_op_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ limitations under the License. */
#include <iostream>

#include "gtest/gtest.h"
#include "paddle/fluid/operators/ops_signature/signatures.h"
#include "paddle/phi/core/compat/op_utils.h"
#include "paddle/phi/ops/compat/signatures.h"

namespace phi {
namespace tests {
Expand Down
2 changes: 1 addition & 1 deletion test/cpp/phi/ops/test_op_signature.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ limitations under the License. */
#include <memory>
#include <unordered_set>

#include "paddle/fluid/operators/ops_signature/signatures.h"
#include "paddle/phi/core/compat/op_utils.h"
#include "paddle/phi/ops/compat/signatures.h"

namespace phi {
namespace tests {
Expand Down

0 comments on commit b3d4ce3

Please sign in to comment.