Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
b3da13a
First stage of cutensor wrapper, only works with basic strides
SpaceyLake Oct 3, 2025
362962c
Added the use of handle
SpaceyLake Oct 3, 2025
f2ed80f
Updated bindings allowing for non-contigous output tensor.
SpaceyLake Oct 10, 2025
933fba4
Modified to work with current CuTensor bindings
SpaceyLake Oct 10, 2025
a2d46d3
Added functionality for elemental operation on D
SpaceyLake Oct 14, 2025
00e90e5
Fixed function name
SpaceyLake Oct 22, 2025
439d5cf
Fixed precision type
SpaceyLake Oct 22, 2025
e8f86f0
Small sectioning optimization
SpaceyLake Oct 22, 2025
412f1fe
Fixed scalar for permute D
SpaceyLake Oct 22, 2025
f584e7d
Fixed sectioning
SpaceyLake Oct 22, 2025
2b2ecec
Created a demo version that loads libraries dynamically
SpaceyLake Oct 22, 2025
29230cb
Created a test version that loads libraries dynamically
SpaceyLake Oct 22, 2025
aa69f9a
Simple exapmle of using CuTensor
SpaceyLake Oct 22, 2025
f407841
Made cuda stream a part of TAPP_executor
SpaceyLake Oct 27, 2025
4ca108b
Algorithm correction
SpaceyLake Oct 27, 2025
a917783
Added cutensor handle to TAPP_handle
SpaceyLake Oct 27, 2025
d80d06f
Corrected copying of memory
SpaceyLake Oct 27, 2025
f8e70fb
cutensor error handling
janbrandejs Nov 21, 2025
87cdea5
can compile with cmake
SpaceyLake Dec 4, 2025
3353f35
Fixed typo
SpaceyLake Dec 4, 2025
31b44ba
Added the handle to create tensor info
SpaceyLake Dec 5, 2025
0d67763
Added handle when creating tensor info in old files
SpaceyLake Jan 9, 2026
7dbaf36
Uncommented code
SpaceyLake Jan 9, 2026
81e8234
Made test use tblis instead of cutensor
SpaceyLake Jan 9, 2026
c6d6737
Added the use of attributes to decide if input is on host or device
SpaceyLake Jan 9, 2026
9f361ad
Added demo for cutensor with on device input
SpaceyLake Jan 9, 2026
2a466f3
Dynamic demo running on cutensor with attribute to telling use of hos…
SpaceyLake Jan 9, 2026
7f061fa
Updated error handling
SpaceyLake Jan 14, 2026
d701639
Updated function calls with create executor and handle as part of the…
SpaceyLake Feb 5, 2026
f6838a0
Added define statement
SpaceyLake Feb 5, 2026
c77f973
Updated include
SpaceyLake Feb 5, 2026
3698239
Creation of handlle and executor now handled by TAPP
SpaceyLake Feb 5, 2026
ded6ad7
Removed TAPP_EXPORT from definitions
SpaceyLake Feb 5, 2026
6276132
Removed unnecessary includes
SpaceyLake Feb 5, 2026
7fcad82
Corrected print
SpaceyLake Feb 5, 2026
aa80808
Updated function calls for cudemo
SpaceyLake Feb 5, 2026
bff60bb
Restructured
SpaceyLake Feb 5, 2026
6269b5f
Updated to follow the new "normal" test
SpaceyLake Feb 5, 2026
49a395a
Updated cmake to work with the new changes
SpaceyLake Feb 5, 2026
cfbb6d4
Updated cmake to not require cuda
SpaceyLake Feb 5, 2026
f21e618
Moved the adding of test
SpaceyLake Feb 5, 2026
ada644c
Attempt to use cuda in tests
SpaceyLake Feb 5, 2026
c06e280
Fixed missed D
SpaceyLake Feb 5, 2026
1f2671b
Attempt to fix "CMAKE_C_COMPILER not set, after EnableLanguage"
SpaceyLake Feb 5, 2026
2ea7cd1
improve cutensor lib discovery inc. conda install, allow custom tblis…
jurajHasik Feb 7, 2026
a157683
Merge branch 'cutensor_bindings' into cmake_extension
SpaceyLake Feb 9, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions .github/workflows/cmake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,12 @@ jobs:
- os: ubuntu-24.04
cc: /usr/bin/gcc-14
cxx: /usr/bin/g++-14
cuda: true
sanitize_flags: -fsanitize=address -fsanitize=leak -fsanitize=undefined -fno-omit-frame-pointer -fno-var-tracking
- os: macos-14
cc: clang
cxx: clang++
cuda: false
sanitize_flags: -fsanitize=address -fsanitize=undefined -fno-omit-frame-pointer -fno-var-tracking

name: "${{ matrix.valgrind && 'Valgrind' || matrix.sanitize && 'Sanitizers' || '' }} ${{ matrix.os }}: ${{ matrix.cxx }} ${{ matrix.build_type }}"
Expand All @@ -52,6 +54,8 @@ jobs:
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
-DCMAKE_UNITY_BUILD=${{ matrix.build_type == 'Debug' || matrix.valgrind }}
-DTAPP_REFERENCE_ENABLE_TBLIS=ON
-DTAPP_REFERENCE_BUILD_CUTENSOR_BINDS=${{ matrix.cuda && 'ON' || 'OFF' }}

steps:
- uses: actions/checkout@v4

Expand Down Expand Up @@ -98,6 +102,17 @@ jobs:
run: |
sudo apt-get update
sudo apt-get install ninja-build g++-14 liblapack-dev ccache valgrind

- name: Install prerequisites CUDA Toolkit (Ubuntu only)
if: ${{ matrix.cuda }}
run: |
sudo apt-get install -y nvidia-cuda-toolkit

- name: Set CUDA host compiler
if: ${{ matrix.cuda }}
run: |
echo "CUDAHOSTCXX=${{ matrix.cxx }}" >> $GITHUB_ENV

- name: Prepare ccache timestamp
id: ccache_cache_timestamp
shell: cmake -P {0}
Expand Down
60 changes: 48 additions & 12 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -114,18 +114,30 @@ if(TAPP_REFERENCE_ENABLE_TBLIS)
endif()

set(TBLIS_INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/tblis)
# Option to provide custom path to tblis source
set(TAPP_REFERENCE_TBLIS_SOURCE_DIR "" CACHE PATH "Path to existing tblis source directory (if empty, will default to fetching from GitHub)")

if(TAPP_REFERENCE_TBLIS_SOURCE_DIR)
# Use user-provided tblis source directory
if(NOT EXISTS "${TAPP_REFERENCE_TBLIS_SOURCE_DIR}/CMakeLists.txt")
message(FATAL_ERROR "TAPP_REFERENCE_TBLIS_SOURCE_DIR is set to '${TAPP_REFERENCE_TBLIS_SOURCE_DIR}' but no CMakeLists.txt found there")
endif()
message(STATUS "Using tblis from: ${TAPP_REFERENCE_TBLIS_SOURCE_DIR}")
add_subdirectory(${TAPP_REFERENCE_TBLIS_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}/_deps/tblis-build)
else()
# Fetch tblis from GitHub
include(FetchContent)

FetchContent_Declare(
tblis
GIT_REPOSITORY https://github.com/devinamatthews/tblis.git
GIT_TAG 9b95712
PREFIX ${CMAKE_CURRENT_BINARY_DIR}/_deps/tblis
UPDATE_DISCONNECTED TRUE
)

include(FetchContent)

FetchContent_Declare(
tblis
GIT_REPOSITORY https://github.com/devinamatthews/tblis.git
GIT_TAG 9b95712
PREFIX ${CMAKE_CURRENT_BINARY_DIR}/_deps/tblis
UPDATE_DISCONNECTED TRUE
)

FetchContent_MakeAvailable(tblis)
FetchContent_MakeAvailable(tblis)
endif()

target_compile_definitions(tapp-reference PRIVATE TAPP_REFERENCE_ENABLE_TBLIS=1)

Expand Down Expand Up @@ -175,7 +187,8 @@ if (TAPP_REFERENCE_BUILD_CUTENSOR_BINDINGS)

set(CUTENSOR_ROOT "/usr/local/cutensor")
set(CUTENSOR_INCLUDE_DIR "${CUTENSOR_ROOT}/include")
set(CUTENSOR_LIBRARY_DIR "${CUTENSOR_ROOT}/lib" "${CUTENSOR_ROOT}/lib/11")
file(GLOB CUTENSOR_VERSIONED_DIRS "${CUTENSOR_ROOT}/lib/[0-9]*")
set(CUTENSOR_LIBRARY_DIR "${CUTENSOR_ROOT}/lib" ${CUTENSOR_VERSIONED_DIRS})

find_library(
CUTENSOR_LIB
Expand All @@ -185,9 +198,18 @@ if (TAPP_REFERENCE_BUILD_CUTENSOR_BINDINGS)

if (NOT CUTENSOR_LIB)
message(FATAL_ERROR "cuTENSOR library not found. Set CUTENSOR_ROOT correctly.")
else()
get_filename_component(CUTENSOR_LIBRARY_DIR ${CUTENSOR_LIB} DIRECTORY)
if(CUTENSOR_LIBRARY_DIR MATCHES "/[0-9]+$")
get_filename_component(CUTENSOR_LIBRARY_DIR ${CUTENSOR_LIBRARY_DIR} DIRECTORY)
endif()
get_filename_component(CUTENSOR_ROOT ${CUTENSOR_LIBRARY_DIR} DIRECTORY)

set(CUTENSOR_INCLUDE_DIR "${CUTENSOR_ROOT}/include")
endif()

message(STATUS "Found cuTENSOR: ${CUTENSOR_LIB}")
message(STATUS "cuTENSOR include dir: ${CUTENSOR_INCLUDE_DIR}")

add_library(cutensor_binds SHARED)

Expand Down Expand Up @@ -335,6 +357,8 @@ if(BUILD_TESTING)
tapp-reference-cutensor-demo
PUBLIC
${CMAKE_CURRENT_SOURCE_DIR}/test
PRIVATE
${CUTENSOR_INCLUDE_DIR}
)

add_test(
Expand Down Expand Up @@ -366,6 +390,12 @@ if(BUILD_TESTING)
NAME tapp-reference-demo-dynamic
COMMAND $<TARGET_FILE:tapp-reference-demo-dynamic>
)

target_link_libraries(
tapp-reference-demo-dynamic
PRIVATE
${CMAKE_DL_LIBS}
)

# ----------------------------------------------------------------------------
# test using dynamic library
Expand All @@ -390,6 +420,12 @@ if(BUILD_TESTING)
NAME tapp-reference-test-dynamic
COMMAND $<TARGET_FILE:tapp-reference-test-dynamic>
)

target_link_libraries(
tapp-reference-test-dynamic
PRIVATE
${CMAKE_DL_LIBS}
)

endif()

Expand Down
Loading