@@ -149,42 +149,112 @@ endif()
149149
150150
151151
152- # Define Combinations
153- set (COMBINATIONS_3
154- "0,0,0"
155- "0,0,2"
156- "0,1,2"
157- "0,2,0"
158- "0,2,1"
159- "0,2,2"
160- "1,0,0"
161- "1,1,1"
162- "1,1,2"
163- "1,2,0"
164- "1,2,1"
165- "1,2,2"
166- "2,0,0"
167- "2,0,1"
168- "2,0,2"
169- "2,1,0"
170- "2,1,1"
171- "2,1,2"
172- "2,2,0"
173- "2,2,1"
174- "2,2,2"
175- )
176- set (COMBINATIONS_2
177- "0,0"
178- "0,1"
179- "1,0"
180- "1,1"
181- "0,2"
182- "2,0"
183- "1,2"
184- "2,1"
185- "2,2"
186- )
152+ # The following existing code section will now use the optimized COMBINATIONS_3:
153+ foreach (TEMPLATE_FILE ${CUDA_TEMPLATE_FILES} )
154+ foreach (COMBINATION ${COMBINATIONS_3} )
155+ genSingleFunctionCuda(${TEMPLATE_FILE} ${COMBINATION} "${CMAKE_BINARY_DIR} /cuda_generated" )
156+ endforeach ()
157+ endforeach ()
158+
187159
160+ # Define a function to generate individual CUDA files for each type combination
161+ # Find and replace the existing function:
162+ function (genSingleFunctionCuda TEMPLATE_FILE COMBINATION OUTPUT_DIR)
163+ # ...existing implementation...
164+ endfunction ()
165+
166+ # REPLACE WITH THIS IMPLEMENTATION:
167+
168+ function (genSingleFunctionCuda TEMPLATE_FILE COMBINATION OUTPUT_DIR)
169+ # Split the COMBINATION string into a list
170+ string (REPLACE "," ";" COMB_LIST "${COMBINATION} " )
171+
172+ # Extract combination values
173+ list (GET COMB_LIST 0 COMB1)
174+ list (GET COMB_LIST 1 COMB2)
175+ list (GET COMB_LIST 2 COMB3)
176+
177+ # Get the base name from the template file
178+ get_filename_component (TEMPLATE_BASE "${TEMPLATE_FILE} " NAME_WE )
179+
180+ # Read the template content
181+ file (READ "${TEMPLATE_FILE} " TEMPLATE_CONTENT)
182+
183+ # Extract class and method names using regex
184+ string (REGEX MATCH "([a-zA-Z0-9_:]+),[ \n\t ]*::([a-zA-Z0-9_]+)" FUNCTION_MATCH "${TEMPLATE_CONTENT} " )
185+ set (CLASS_NAME ${CMAKE_MATCH_1} )
186+ set (METHOD_NAME ${CMAKE_MATCH_2} )
187+
188+ # Clean class name for file naming
189+ string (REGEX REPLACE "::" "_" CLASS_NAME_CLEAN "${CLASS_NAME} " )
190+
191+ # Extract function signature to create a hash
192+ string (REGEX MATCH "::${METHOD_NAME} \\ (([^;]+)\\ );" FUNC_ARGS_MATCH "${TEMPLATE_CONTENT} " )
193+ set (FUNCTION_ARGS "${CMAKE_MATCH_1} " )
194+
195+ # Create a signature identifier based on parameter count and types
196+ set (PARAM_COUNT 0)
197+ set (SIGNATURE_ID "" )
198+
199+ # Split the function arguments and count them
200+ string (REPLACE "," ";" ARGS_LIST "${FUNCTION_ARGS} " )
201+ list (LENGTH ARGS_LIST PARAM_COUNT)
202+
203+ # Create a hash of the signature
204+ foreach (ARG ${ARGS_LIST} )
205+ # Extract just the type name from the parameter
206+ string (REGEX MATCH "^[^*& \t ]+" TYPE_NAME "${ARG} " )
207+ if (TYPE_NAME)
208+ # Append to the signature ID
209+ string (APPEND SIGNATURE_ID "_${TYPE_NAME} " )
210+ endif ()
211+ endforeach ()
212+
213+ # Create a shorter hash if the signature is too long
214+ if (SIGNATURE_ID MATCHES ".{30,}" )
215+ string (MD5 SIGNATURE_HASH "${SIGNATURE_ID} " )
216+ string (SUBSTRING "${SIGNATURE_HASH} " 0 8 SIGNATURE_ID)
217+ set (SIGNATURE_ID "_h${SIGNATURE_ID} " )
218+ endif ()
219+
220+ # Output filename with signature identifier
221+ set (OUTPUT_FILE "${CLASS_NAME_CLEAN} _${METHOD_NAME}${SIGNATURE_ID} _${COMB1} _${COMB2} _${COMB3} .cu" )
222+ set (GENERATED_FILE "${OUTPUT_DIR} /${OUTPUT_FILE} " )
223+
224+ # Check if this file already exists - if so, no need to regenerate
225+ if (EXISTS "${GENERATED_FILE} " )
226+ # Add to CUDA_GENERATED_SOURCES without regenerating
227+ list (APPEND CUDA_GENERATED_SOURCES "${GENERATED_FILE} " )
228+ set (CUDA_GENERATED_SOURCES ${CUDA_GENERATED_SOURCES} PARENT_SCOPE)
229+ return ()
230+ endif ()
231+
232+ # Extract just the necessary header information
233+ set (START_MARKER "ITERATE_COMBINATIONS_3" )
234+ string (FIND "${TEMPLATE_CONTENT} " "${START_MARKER} " START_POS)
235+ if (START_POS EQUAL -1)
236+ message (FATAL_ERROR "Could not find ITERATE_COMBINATIONS_3 in template file ${TEMPLATE_FILE} " )
237+ endif ()
238+
239+ string (SUBSTRING "${TEMPLATE_CONTENT} " 0 ${START_POS} HEADER_CONTENT)
240+
241+ # Create streamlined content with just the single instantiation needed
242+ set (NEW_CONTENT "${HEADER_CONTENT} \n\n // Single function instantiation for ${CLASS_NAME} ::${METHOD_NAME} \n " )
243+ string (APPEND NEW_CONTENT "template void ${CLASS_NAME} ::${METHOD_NAME} <SD_SINGLE_TYPE_${COMB1} , SD_SINGLE_TYPE_${COMB2} , SD_SINGLE_TYPE_${COMB3} >(${FUNCTION_ARGS} );\n " )
244+
245+ # Create directory if needed
246+ file (MAKE_DIRECTORY "${OUTPUT_DIR} " )
247+
248+ # Write the processed content to the output file
249+ file (WRITE "${GENERATED_FILE} " "${NEW_CONTENT} " )
250+
251+ # Set properties and add to sources
252+ set_source_files_properties ("${GENERATED_FILE} " PROPERTIES LANGUAGE CUDA)
253+ list (APPEND CUDA_GENERATED_SOURCES "${GENERATED_FILE} " )
254+ set (CUDA_GENERATED_SOURCES ${CUDA_GENERATED_SOURCES} PARENT_SCOPE)
255+
256+ message (STATUS "Generated: ${GENERATED_FILE} " )
257+ endfunction ()
188258
189259# Define genPartitionCombination Function
190260function (genPartitionCombination TEMPLATE_FILE COMBINATION_TYPE COMBINATION OUTPUT_DIR)
@@ -267,6 +337,8 @@ set(INSTANTIATION_TEMPLATES_2
267337 "${CMAKE_CURRENT_SOURCE_DIR} /loops/cpu/comb_compilation_units/pairwise_instantiation_template_2.cpp.in"
268338)
269339
340+
341+
270342# Example Usage of genPartitionCombination
271343# Define combinations for ITERATE_COMBINATIONS_3
272344set (COMBINATIONS_3
@@ -438,6 +510,56 @@ function(genCompilation FILE_ITEM)
438510 set (CUSTOMOPS_GENERIC_SOURCES ${CUSTOMOPS_GENERIC_SOURCES} PARENT_SCOPE)
439511endfunction ()
440512
513+
514+ # Define a function to generate individual CUDA files for each type combination
515+ # Define a function to generate individual CUDA files for each type combination
516+ # Define a function to generate individual CUDA files for each type combination
517+ function (genSingleFunctionCuda TEMPLATE_FILE COMBINATION OUTPUT_DIR)
518+ # Split the COMBINATION string into a list
519+ string (REPLACE "," ";" COMB_LIST "${COMBINATION} " )
520+
521+ # Extract combination values
522+ list (GET COMB_LIST 0 COMB1)
523+ list (GET COMB_LIST 1 COMB2)
524+ list (GET COMB_LIST 2 COMB3)
525+
526+ # Get the base name from the template file
527+ get_filename_component (TEMPLATE_BASE "${TEMPLATE_FILE} " NAME_WE )
528+
529+ # Read the template content
530+ file (READ "${TEMPLATE_FILE} " TEMPLATE_CONTENT)
531+
532+ # Extract class and method names using regex
533+ string (REGEX MATCH "([a-zA-Z0-9_:]+),[ \n\t ]*::([a-zA-Z0-9_]+)" FUNCTION_MATCH "${TEMPLATE_CONTENT} " )
534+ set (CLASS_NAME ${CMAKE_MATCH_1} )
535+ set (METHOD_NAME ${CMAKE_MATCH_2} )
536+
537+ # Clean class name for file naming
538+ string (REGEX REPLACE "::" "_" CLASS_NAME_CLEAN "${CLASS_NAME} " )
539+
540+ # Output filename
541+ set (OUTPUT_FILE "${CLASS_NAME_CLEAN} _${METHOD_NAME} _${COMB1} _${COMB2} _${COMB3} .cu" )
542+ set (GENERATED_FILE "${OUTPUT_DIR} /${OUTPUT_FILE} " )
543+
544+ # Replace placeholders with actual values
545+ string (REPLACE "@COMB1@" "${COMB1} " MODIFIED_CONTENT "${TEMPLATE_CONTENT} " )
546+ string (REPLACE "@COMB2@" "${COMB2} " MODIFIED_CONTENT "${MODIFIED_CONTENT} " )
547+ string (REPLACE "@COMB3@" "${COMB3} " MODIFIED_CONTENT "${MODIFIED_CONTENT} " )
548+
549+ # Create directory if needed
550+ file (MAKE_DIRECTORY "${OUTPUT_DIR} " )
551+
552+ # Write the content
553+ file (WRITE "${GENERATED_FILE} " "${MODIFIED_CONTENT} " )
554+
555+ # Set properties and add to sources
556+ set_source_files_properties ("${GENERATED_FILE} " PROPERTIES LANGUAGE CUDA)
557+ list (APPEND CUDA_GENERATED_SOURCES "${GENERATED_FILE} " )
558+ set (CUDA_GENERATED_SOURCES ${CUDA_GENERATED_SOURCES} PARENT_SCOPE)
559+
560+ message (STATUS "Generated: ${GENERATED_FILE} " )
561+ endfunction ()
562+
441563# MSVC runtime lib can be either "MultiThreaded" or "MultiThreadedDLL", /MT and /MD respectively
442564set (MSVC_RT_LIB "MultiThreadedDLL" )
443565
@@ -649,18 +771,23 @@ else()
649771endif ()
650772
651773# Existing flatbuffers download section
652- configure_file (CMakeLists.txt.in flatbuffers-download/CMakeLists.txt)
653- execute_process (COMMAND ${CMAKE_COMMAND} "${CMAKE_GENERATOR} " .
654- RESULT_VARIABLE result
655- WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} /flatbuffers-download )
656- if (result)
657- message (FATAL_ERROR "CMake step for flatbuffers failed: ${result} " )
658- endif ()
659- execute_process (COMMAND ${CMAKE_COMMAND} --build .
660- RESULT_VARIABLE result
661- WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} /flatbuffers-download )
662- if (result)
663- message (FATAL_ERROR "Build step for flatbuffers failed: ${result} " )
774+ if (NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR} /flatbuffers-download-complete.marker" )
775+ configure_file (CMakeLists.txt.in flatbuffers-download/CMakeLists.txt)
776+ execute_process (COMMAND ${CMAKE_COMMAND} "${CMAKE_GENERATOR} " .
777+ RESULT_VARIABLE result
778+ WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} /flatbuffers-download)
779+ if (result)
780+ message (FATAL_ERROR "CMake step for flatbuffers failed: ${result} " )
781+ endif ()
782+ execute_process (COMMAND ${CMAKE_COMMAND} --build .
783+ RESULT_VARIABLE result
784+ WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} /flatbuffers-download)
785+ if (result)
786+ message (FATAL_ERROR "Build step for flatbuffers failed: ${result} " )
787+ endif ()
788+ file (WRITE "${CMAKE_CURRENT_BINARY_DIR} /flatbuffers-download-complete.marker" "Download complete" )
789+ else ()
790+ message (STATUS "Flatbuffers already downloaded, skipping download step" )
664791endif ()
665792
666793# Add flatbuffers directly to build
0 commit comments