Skip to content

Commit ce64118

Browse files
authored
Fix cuda symbol errors + minor cuda refactoring (deeplearning4j#10196)
* Cuda refactoring: Integrates device based shape buffers for direct shape tries now Update cmakelists to ignore helpers/cpu when compiling cuda Remove aurora checks from nd4j backend * Fix linkage errors with undefined symbols in cuda
1 parent 2f8794e commit ce64118

File tree

40 files changed

+1030
-3442
lines changed

40 files changed

+1030
-3442
lines changed

libnd4j/CMakeLists.txt

Lines changed: 174 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -149,42 +149,112 @@ endif()
149149

150150

151151

152-
# Define Combinations
153-
set(COMBINATIONS_3
154-
"0,0,0"
155-
"0,0,2"
156-
"0,1,2"
157-
"0,2,0"
158-
"0,2,1"
159-
"0,2,2"
160-
"1,0,0"
161-
"1,1,1"
162-
"1,1,2"
163-
"1,2,0"
164-
"1,2,1"
165-
"1,2,2"
166-
"2,0,0"
167-
"2,0,1"
168-
"2,0,2"
169-
"2,1,0"
170-
"2,1,1"
171-
"2,1,2"
172-
"2,2,0"
173-
"2,2,1"
174-
"2,2,2"
175-
)
176-
set(COMBINATIONS_2
177-
"0,0"
178-
"0,1"
179-
"1,0"
180-
"1,1"
181-
"0,2"
182-
"2,0"
183-
"1,2"
184-
"2,1"
185-
"2,2"
186-
)
152+
# The following existing code section will now use the optimized COMBINATIONS_3:
153+
foreach(TEMPLATE_FILE ${CUDA_TEMPLATE_FILES})
154+
foreach(COMBINATION ${COMBINATIONS_3})
155+
genSingleFunctionCuda(${TEMPLATE_FILE} ${COMBINATION} "${CMAKE_BINARY_DIR}/cuda_generated")
156+
endforeach()
157+
endforeach()
158+
187159

160+
# Define a function to generate individual CUDA files for each type combination
161+
# Find and replace the existing function:
162+
function(genSingleFunctionCuda TEMPLATE_FILE COMBINATION OUTPUT_DIR)
163+
# ...existing implementation...
164+
endfunction()
165+
166+
# REPLACE WITH THIS IMPLEMENTATION:
167+
168+
function(genSingleFunctionCuda TEMPLATE_FILE COMBINATION OUTPUT_DIR)
169+
# Split the COMBINATION string into a list
170+
string(REPLACE "," ";" COMB_LIST "${COMBINATION}")
171+
172+
# Extract combination values
173+
list(GET COMB_LIST 0 COMB1)
174+
list(GET COMB_LIST 1 COMB2)
175+
list(GET COMB_LIST 2 COMB3)
176+
177+
# Get the base name from the template file
178+
get_filename_component(TEMPLATE_BASE "${TEMPLATE_FILE}" NAME_WE)
179+
180+
# Read the template content
181+
file(READ "${TEMPLATE_FILE}" TEMPLATE_CONTENT)
182+
183+
# Extract class and method names using regex
184+
string(REGEX MATCH "([a-zA-Z0-9_:]+),[ \n\t]*::([a-zA-Z0-9_]+)" FUNCTION_MATCH "${TEMPLATE_CONTENT}")
185+
set(CLASS_NAME ${CMAKE_MATCH_1})
186+
set(METHOD_NAME ${CMAKE_MATCH_2})
187+
188+
# Clean class name for file naming
189+
string(REGEX REPLACE "::" "_" CLASS_NAME_CLEAN "${CLASS_NAME}")
190+
191+
# Extract function signature to create a hash
192+
string(REGEX MATCH "::${METHOD_NAME}\\(([^;]+)\\);" FUNC_ARGS_MATCH "${TEMPLATE_CONTENT}")
193+
set(FUNCTION_ARGS "${CMAKE_MATCH_1}")
194+
195+
# Create a signature identifier based on parameter count and types
196+
set(PARAM_COUNT 0)
197+
set(SIGNATURE_ID "")
198+
199+
# Split the function arguments and count them
200+
string(REPLACE "," ";" ARGS_LIST "${FUNCTION_ARGS}")
201+
list(LENGTH ARGS_LIST PARAM_COUNT)
202+
203+
# Create a hash of the signature
204+
foreach(ARG ${ARGS_LIST})
205+
# Extract just the type name from the parameter
206+
string(REGEX MATCH "^[^*& \t]+" TYPE_NAME "${ARG}")
207+
if(TYPE_NAME)
208+
# Append to the signature ID
209+
string(APPEND SIGNATURE_ID "_${TYPE_NAME}")
210+
endif()
211+
endforeach()
212+
213+
# Create a shorter hash if the signature is too long
214+
if(SIGNATURE_ID MATCHES ".{30,}")
215+
string(MD5 SIGNATURE_HASH "${SIGNATURE_ID}")
216+
string(SUBSTRING "${SIGNATURE_HASH}" 0 8 SIGNATURE_ID)
217+
set(SIGNATURE_ID "_h${SIGNATURE_ID}")
218+
endif()
219+
220+
# Output filename with signature identifier
221+
set(OUTPUT_FILE "${CLASS_NAME_CLEAN}_${METHOD_NAME}${SIGNATURE_ID}_${COMB1}_${COMB2}_${COMB3}.cu")
222+
set(GENERATED_FILE "${OUTPUT_DIR}/${OUTPUT_FILE}")
223+
224+
# Check if this file already exists - if so, no need to regenerate
225+
if(EXISTS "${GENERATED_FILE}")
226+
# Add to CUDA_GENERATED_SOURCES without regenerating
227+
list(APPEND CUDA_GENERATED_SOURCES "${GENERATED_FILE}")
228+
set(CUDA_GENERATED_SOURCES ${CUDA_GENERATED_SOURCES} PARENT_SCOPE)
229+
return()
230+
endif()
231+
232+
# Extract just the necessary header information
233+
set(START_MARKER "ITERATE_COMBINATIONS_3")
234+
string(FIND "${TEMPLATE_CONTENT}" "${START_MARKER}" START_POS)
235+
if(START_POS EQUAL -1)
236+
message(FATAL_ERROR "Could not find ITERATE_COMBINATIONS_3 in template file ${TEMPLATE_FILE}")
237+
endif()
238+
239+
string(SUBSTRING "${TEMPLATE_CONTENT}" 0 ${START_POS} HEADER_CONTENT)
240+
241+
# Create streamlined content with just the single instantiation needed
242+
set(NEW_CONTENT "${HEADER_CONTENT}\n\n// Single function instantiation for ${CLASS_NAME}::${METHOD_NAME}\n")
243+
string(APPEND NEW_CONTENT "template void ${CLASS_NAME}::${METHOD_NAME}<SD_SINGLE_TYPE_${COMB1}, SD_SINGLE_TYPE_${COMB2}, SD_SINGLE_TYPE_${COMB3}>(${FUNCTION_ARGS});\n")
244+
245+
# Create directory if needed
246+
file(MAKE_DIRECTORY "${OUTPUT_DIR}")
247+
248+
# Write the processed content to the output file
249+
file(WRITE "${GENERATED_FILE}" "${NEW_CONTENT}")
250+
251+
# Set properties and add to sources
252+
set_source_files_properties("${GENERATED_FILE}" PROPERTIES LANGUAGE CUDA)
253+
list(APPEND CUDA_GENERATED_SOURCES "${GENERATED_FILE}")
254+
set(CUDA_GENERATED_SOURCES ${CUDA_GENERATED_SOURCES} PARENT_SCOPE)
255+
256+
message(STATUS "Generated: ${GENERATED_FILE}")
257+
endfunction()
188258

189259
# Define genPartitionCombination Function
190260
function(genPartitionCombination TEMPLATE_FILE COMBINATION_TYPE COMBINATION OUTPUT_DIR)
@@ -267,6 +337,8 @@ set(INSTANTIATION_TEMPLATES_2
267337
"${CMAKE_CURRENT_SOURCE_DIR}/loops/cpu/comb_compilation_units/pairwise_instantiation_template_2.cpp.in"
268338
)
269339

340+
341+
270342
# Example Usage of genPartitionCombination
271343
# Define combinations for ITERATE_COMBINATIONS_3
272344
set(COMBINATIONS_3
@@ -438,6 +510,56 @@ function(genCompilation FILE_ITEM)
438510
set(CUSTOMOPS_GENERIC_SOURCES ${CUSTOMOPS_GENERIC_SOURCES} PARENT_SCOPE)
439511
endfunction()
440512

513+
514+
# Define a function to generate individual CUDA files for each type combination
515+
# Define a function to generate individual CUDA files for each type combination
516+
# Define a function to generate individual CUDA files for each type combination
517+
function(genSingleFunctionCuda TEMPLATE_FILE COMBINATION OUTPUT_DIR)
518+
# Split the COMBINATION string into a list
519+
string(REPLACE "," ";" COMB_LIST "${COMBINATION}")
520+
521+
# Extract combination values
522+
list(GET COMB_LIST 0 COMB1)
523+
list(GET COMB_LIST 1 COMB2)
524+
list(GET COMB_LIST 2 COMB3)
525+
526+
# Get the base name from the template file
527+
get_filename_component(TEMPLATE_BASE "${TEMPLATE_FILE}" NAME_WE)
528+
529+
# Read the template content
530+
file(READ "${TEMPLATE_FILE}" TEMPLATE_CONTENT)
531+
532+
# Extract class and method names using regex
533+
string(REGEX MATCH "([a-zA-Z0-9_:]+),[ \n\t]*::([a-zA-Z0-9_]+)" FUNCTION_MATCH "${TEMPLATE_CONTENT}")
534+
set(CLASS_NAME ${CMAKE_MATCH_1})
535+
set(METHOD_NAME ${CMAKE_MATCH_2})
536+
537+
# Clean class name for file naming
538+
string(REGEX REPLACE "::" "_" CLASS_NAME_CLEAN "${CLASS_NAME}")
539+
540+
# Output filename
541+
set(OUTPUT_FILE "${CLASS_NAME_CLEAN}_${METHOD_NAME}_${COMB1}_${COMB2}_${COMB3}.cu")
542+
set(GENERATED_FILE "${OUTPUT_DIR}/${OUTPUT_FILE}")
543+
544+
# Replace placeholders with actual values
545+
string(REPLACE "@COMB1@" "${COMB1}" MODIFIED_CONTENT "${TEMPLATE_CONTENT}")
546+
string(REPLACE "@COMB2@" "${COMB2}" MODIFIED_CONTENT "${MODIFIED_CONTENT}")
547+
string(REPLACE "@COMB3@" "${COMB3}" MODIFIED_CONTENT "${MODIFIED_CONTENT}")
548+
549+
# Create directory if needed
550+
file(MAKE_DIRECTORY "${OUTPUT_DIR}")
551+
552+
# Write the content
553+
file(WRITE "${GENERATED_FILE}" "${MODIFIED_CONTENT}")
554+
555+
# Set properties and add to sources
556+
set_source_files_properties("${GENERATED_FILE}" PROPERTIES LANGUAGE CUDA)
557+
list(APPEND CUDA_GENERATED_SOURCES "${GENERATED_FILE}")
558+
set(CUDA_GENERATED_SOURCES ${CUDA_GENERATED_SOURCES} PARENT_SCOPE)
559+
560+
message(STATUS "Generated: ${GENERATED_FILE}")
561+
endfunction()
562+
441563
# MSVC runtime lib can be either "MultiThreaded" or "MultiThreadedDLL", /MT and /MD respectively
442564
set(MSVC_RT_LIB "MultiThreadedDLL")
443565

@@ -649,18 +771,23 @@ else()
649771
endif()
650772

651773
# Existing flatbuffers download section
652-
configure_file(CMakeLists.txt.in flatbuffers-download/CMakeLists.txt)
653-
execute_process(COMMAND ${CMAKE_COMMAND} "${CMAKE_GENERATOR}" .
654-
RESULT_VARIABLE result
655-
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/flatbuffers-download )
656-
if(result)
657-
message(FATAL_ERROR "CMake step for flatbuffers failed: ${result}")
658-
endif()
659-
execute_process(COMMAND ${CMAKE_COMMAND} --build .
660-
RESULT_VARIABLE result
661-
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/flatbuffers-download )
662-
if(result)
663-
message(FATAL_ERROR "Build step for flatbuffers failed: ${result}")
774+
if(NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/flatbuffers-download-complete.marker")
775+
configure_file(CMakeLists.txt.in flatbuffers-download/CMakeLists.txt)
776+
execute_process(COMMAND ${CMAKE_COMMAND} "${CMAKE_GENERATOR}" .
777+
RESULT_VARIABLE result
778+
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/flatbuffers-download)
779+
if(result)
780+
message(FATAL_ERROR "CMake step for flatbuffers failed: ${result}")
781+
endif()
782+
execute_process(COMMAND ${CMAKE_COMMAND} --build .
783+
RESULT_VARIABLE result
784+
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/flatbuffers-download)
785+
if(result)
786+
message(FATAL_ERROR "Build step for flatbuffers failed: ${result}")
787+
endif()
788+
file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/flatbuffers-download-complete.marker" "Download complete")
789+
else()
790+
message(STATUS "Flatbuffers already downloaded, skipping download step")
664791
endif()
665792

666793
# Add flatbuffers directly to build

libnd4j/CMakeLists.txt.in

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ cmake_minimum_required(VERSION 2.8.2)
22

33
project(flatbuffers-download NONE)
44

5-
65
include(ExternalProject)
76
ExternalProject_Add(flatbuffers
87
GIT_REPOSITORY https://github.com/google/flatbuffers/
@@ -12,10 +11,8 @@ ExternalProject_Add(flatbuffers
1211
CMAKE_ARGS
1312
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
1413
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
15-
-DFLATBUFFERS_BUILD_FLATC=ON
14+
-DFLATBUFFERS_BUILD_FLATC=${FLATBUFFERS_BUILD_FLATC}
1615
-DCMAKE_BUILD_TYPE=Release
17-
DOWNLOAD_COMMAND ${CMAKE_COMMAND} -E rm -rf ${CMAKE_CURRENT_BINARY_DIR}/flatbuffers-src COMMAND git clone https://github.com/google/flatbuffers/ ${CMAKE_CURRENT_BINARY_DIR}/flatbuffers-src
18-
1916
UPDATE_COMMAND ""
2017
INSTALL_COMMAND ""
2118
TEST_COMMAND ""

0 commit comments

Comments
 (0)