@@ -110,7 +110,7 @@ function(kernel_library TARGET)
110110
111111 cmake_parse_arguments (kernel_library "${options} " "${oneValueArgs} "
112112 "${multiValueArgs} " ${ARGN} )
113-
113+
114114 # used for cc_library selected_rows dir target
115115 set (target_suffix "" )
116116 if ("${kernel_library_SUB_DIR} " STREQUAL "selected_rows" )
@@ -146,16 +146,11 @@ function(kernel_library TARGET)
146146 endif ()
147147 endif ()
148148 if (WITH_XPU_KP )
149- if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR} /kps/${TARGET}.cu )
150- # Change XPU2 file suffix
151- # NOTE(chenweihang): If we can be sure that the *.kps suffix is no longer used, it can be copied directly to *.xpu
152- file (COPY ${CMAKE_CURRENT_SOURCE_DIR} /kps/${TARGET}.cu DESTINATION ${CMAKE_CURRENT_BINARY_DIR} /kps )
153- file (RENAME ${CMAKE_CURRENT_BINARY_DIR} /kps/${TARGET}.cu ${CMAKE_CURRENT_BINARY_DIR} /kps/${TARGET}.kps )
154- list (APPEND kps_srcs ${CMAKE_CURRENT_BINARY_DIR} /kps/${TARGET}.kps )
155- endif ()
156- if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR} /cpu/${TARGET}.cc )
157- list (APPEND kps_srcs ${CMAKE_CURRENT_SOURCE_DIR} /cpu/${TARGET}.cc )
158- endif ()
149+ # Change XPU2 file suffix
150+ # NOTE(chenweihang): If we can be sure that the *.kps suffix is no longer used, it can be copied directly to *.xpu
151+ file (COPY ${CMAKE_CURRENT_SOURCE_DIR} /kps/${TARGET}.cu DESTINATION ${CMAKE_CURRENT_BINARY_DIR} /kps )
152+ file (RENAME ${CMAKE_CURRENT_BINARY_DIR} /kps/${TARGET}.cu ${CMAKE_CURRENT_BINARY_DIR} /kps/${TARGET}.kps )
153+ list (APPEND kps_srcs ${CMAKE_CURRENT_BINARY_DIR} /kps/${TARGET}.kps )
159154 endif ()
160155 else ()
161156 # TODO(chenweihang): impl compile by source later
@@ -186,7 +181,7 @@ function(kernel_library TARGET)
186181 string (REGEX MATCHALL "#include \" paddle\/ phi\/ kernels\/ ${kernel_library_SUB_DIR} \/ [a-z0-9_]+_kernel.h\" " include_kernels ${target_content} )
187182 list (APPEND all_include_kernels ${include_kernels} )
188183 endif ()
189-
184+
190185 foreach (include_kernel ${all_include_kernels} )
191186 if ("${kernel_library_SUB_DIR} " STREQUAL "" )
192187 string (REGEX REPLACE "#include \" paddle\/ phi\/ kernels\/ " "" kernel_name ${include_kernel} )
@@ -219,71 +214,76 @@ function(kernel_library TARGET)
219214 list (LENGTH kps_srcs kps_srcs_len )
220215
221216 # kernel source file level
222- # level 1: base device kernel
223- # - cpu_srcs / gpu_srcs / xpu_srcs / gpudnn_srcs / kps_srcs
217+ # level 1: base device kernel (if any device or dnn kernel exists, the cpu_kernel must be exists!!!)
218+ # - cpu_srcs / gpu_srcs / xpu_srcs / kps_srcs
219+ # = dnn srcs: gpudnn_srcs
224220 # level 2: device-independent kernel
225221 # - common_srcs
226- set (base_device_kernels )
227- set (device_independent_kernel )
228222
229- # 1. Base device kernel compile
230- if (${cpu_srcs_len} GREATER 0 )
231- cc_library (${TARGET} _cpu${target_suffix} SRCS ${cpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} )
232- list (APPEND base_device_kernels ${TARGET} _cpu${target_suffix} )
233- endif ()
234- if (${gpu_srcs_len} GREATER 0 )
235- if (WITH_GPU )
236- nv_library (${TARGET} _gpu${target_suffix} SRCS ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} )
237- elseif (WITH_ROCM )
238- hip_library (${TARGET} _gpu${target_suffix} SRCS ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} )
239- endif ()
240- list (APPEND base_device_kernels ${TARGET} _gpu${target_suffix} )
223+ set (partial_build_flag 0 )
224+ set (base_build_flag 0 )
225+ if (${common_srcs_len} GREATER 0 )
226+ set (partial_build_flag 1 )
241227 endif ()
242- if (${xpu_srcs_len} GREATER 0 )
243- cc_library (${TARGET} _xpu${target_suffix} SRCS ${xpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} )
244- list (APPEND base_device_kernels ${TARGET} _xpu${target_suffix} )
228+ if (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0 OR ${xpu_srcs_len} GREATER 0 OR ${kps_srcs_len} GREATER 0 )
229+ set (base_build_flag 1 )
245230 endif ()
231+
232+ # gpudnn or mkldnn needs to be compiled separately
233+ set (dnn_kernels )
246234 if (${gpudnn_srcs_len} GREATER 0 )
247235 if (WITH_GPU )
248236 nv_library (${TARGET} _gpudnn${target_suffix} SRCS ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} )
249237 elseif (WITH_ROCM )
250238 hip_library (${TARGET} _gpudnn${target_suffix} SRCS ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} )
251239 endif ()
252- list (APPEND base_device_kernels ${TARGET} _gpudnn${target_suffix} )
253- endif ()
254- if (${kps_srcs_len} GREATER 0 )
255- # only when WITH_XPU_KP, the kps_srcs_len can be > 0
256- xpu_library (${TARGET} _kps${target_suffix} SRCS ${kps_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} )
257- list (APPEND base_device_kernels ${TARGET} _kps${target_suffix} )
240+ list (APPEND dnn_kernels ${TARGET} _gpudnn${target_suffix} )
258241 endif ()
242+ list (LENGTH dnn_kernels dnn_kernels_len )
259243
260- # 2. Device-independent kernel compile
261- if (${common_srcs_len} GREATER 0 )
244+ if (${partial_build_flag} EQUAL 0 AND ${base_build_flag} EQUAL 1 )
262245 if (WITH_GPU )
263- nv_library (${TARGET} _common${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} )
246+ if (${dnn_kernels_len} GREATER 0 )
247+ nv_library (${TARGET} _base${target_suffix} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} )
248+ nv_library (${TARGET}${target_suffix} DEPS ${TARGET} _base${target_suffix} ${dnn_kernels} )
249+ else ()
250+ nv_library (${TARGET}${target_suffix} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} )
251+ endif ()
264252 elseif (WITH_ROCM )
265- hip_library (${TARGET} _common${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} )
253+ if (${dnn_kernels_len} GREATER 0 )
254+ hip_library (${TARGET} _base${target_suffix} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} )
255+ hip_library (${TARGET}${target_suffix} DEPS ${TARGET} _base${target_suffix} ${dnn_kernels} )
256+ else ()
257+ hip_library (${TARGET}${target_suffix} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} )
258+ endif ()
266259 elseif (WITH_XPU_KP )
267- xpu_library (${TARGET} _common ${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps } ${base_device_kernels } )
260+ xpu_library (${TARGET}${target_suffix} SRCS ${cpu_srcs} ${kps_srcs} DEPS ${kernel_library_DEPS } ${kernel_deps } )
268261 else ()
269- cc_library (${TARGET} _common ${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps } ${base_device_kernels } )
262+ cc_library (${TARGET}${target_suffix} SRCS ${cpu_srcs} ${xpu_srcs} DEPS ${kernel_library_DEPS } ${kernel_deps } )
270263 endif ()
271- list (APPEND device_independent_kernel ${TARGET} _common${target_suffix} )
272- endif ()
273-
274-
275- # 3. Unify target compile
276- list (LENGTH base_device_kernels base_device_kernels_len )
277- list (LENGTH device_independent_kernel device_independent_kernel_len )
278- if (${base_device_kernels_len} GREATER 0 OR ${device_independent_kernel_len} GREATER 0 )
264+ elseif (${partial_build_flag} EQUAL 1 AND ${base_build_flag} EQUAL 1 )
265+ if (WITH_GPU )
266+ nv_library (${TARGET} _base${target_suffix} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} )
267+ nv_library (${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${TARGET} _base${target_suffix} ${dnn_kernels} )
268+ elseif (WITH_ROCM )
269+ hip_library (${TARGET} _base${target_suffix} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} )
270+ hip_library (${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${TARGET} _base${target_suffix} ${dnn_kernels} )
271+ elseif (WITH_XPU_KP )
272+ xpu_library (${TARGET} _base${target_suffix} SRCS ${cpu_srcs} ${kps_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} )
273+ xpu_library (${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${TARGET} _base${target_suffix} )
274+ else ()
275+ cc_library (${TARGET} _base${target_suffix} SRCS ${cpu_srcs} ${xpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} )
276+ cc_library (${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${TARGET} _base${target_suffix} )
277+ endif ()
278+ elseif (${partial_build_flag} EQUAL 1 AND ${base_build_flag} EQUAL 0 )
279279 if (WITH_GPU )
280- nv_library (${TARGET}${target_suffix} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels } ${device_independent_kernel } )
280+ nv_library (${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS } ${kernel_deps } )
281281 elseif (WITH_ROCM )
282- hip_library (${TARGET}${target_suffix} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels } ${device_independent_kernel } )
282+ hip_library (${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS } ${kernel_deps } )
283283 elseif (WITH_XPU_KP )
284- xpu_library (${TARGET}${target_suffix} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels } ${device_independent_kernel } )
284+ xpu_library (${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS } ${kernel_deps } )
285285 else ()
286- cc_library (${TARGET}${target_suffix} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels } ${device_independent_kernel } )
286+ cc_library (${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS } ${kernel_deps } )
287287 endif ()
288288 else ()
289289 set (target_build_flag 0 )
0 commit comments