Skip to content
Merged

Dev #74

Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
64 commits
Select commit Hold shift + click to select a range
bd6ad6b
feat(hardware): 实现 mlu 硬件相关的函数;改变编译方式按照以硬件名称命名的目录名区分是否需要编译
kilinchange Jan 3, 2024
02939c9
feat: 在 cmake 中增加 mlu 的相关环境编译
kilinchange Jan 4, 2024
7f82d74
fix: 去掉 cncl 编译
kilinchange Jan 5, 2024
eb4fc4c
fix (frontend): 修改get输入输出,内存大小,增加compiler设置输入接口
PanZezhong1725 Jan 17, 2024
54ffa1a
Merge pull request #77 from InfiniTensor/front_fix
YdrMaster Jan 17, 2024
e76c1fc
add head file
zhangyue207 Jan 17, 2024
8b6333d
Merge pull request #79 from InfiniTensor/add-headfile
YdrMaster Jan 17, 2024
a7ed032
feat: add HardSigmoid cpu/cuda kernel
bitzyz Jan 16, 2024
3998833
feat: add hardswish cpu/cuda kernel
bitzyz Jan 11, 2024
1ce6d81
fix: fix mod kernel
kilinchange Jan 11, 2024
9e4789c
Merge pull request #78 from InfiniTensor/dev-hardsigmoid
YdrMaster Jan 18, 2024
afdfe93
style: 整理 hardswish 计算
YdrMaster Jan 18, 2024
a163eee
Merge pull request #73 from InfiniTensor/dev-hardswish
YdrMaster Jan 18, 2024
ec39fd7
feat(python_ffi): 添加一个字符型的 trace 格式
YdrMaster Jan 18, 2024
20788a3
fix(kernel): 双目运算不能交换
YdrMaster Jan 18, 2024
41036a0
refactor(kernel): 添加一个显存的全局缓存,避免反复的 h2d 拷贝
YdrMaster Jan 23, 2024
1375be3
refactor(python_ffi): 前端控制算子类型是否添加 `onnx::` 前缀以便自定义算子
YdrMaster Jan 23, 2024
88e0712
build: 添加 llm 子项目用于前端大模型自定义算子
YdrMaster Jan 23, 2024
39538e7
docs: 添加 rms normalization 算子定义
YdrMaster Jan 23, 2024
594e06b
docs: 初步添加 attention 算子定义
YdrMaster Jan 24, 2024
4f9d09b
fead(kernel): add max cpu kernel
kilinchange Jan 24, 2024
5e13361
feat(kernel): add test for max/min cpu kernel
kilinchange Jan 25, 2024
6877516
refactor(frontend): 修改 Attributes 为一个类,并将广播机制等代码提升至 frontend 方便不同算子库公用
YdrMaster Jan 25, 2024
9dce4b3
feat(llm): 添加一个既支持 transpose 又支持广播的 MatMul
YdrMaster Jan 25, 2024
eca56d8
feat(llm): 添加 rms normalization 的前端算子、计算图算子和 cpu 核函数
YdrMaster Jan 26, 2024
e15f28f
feat(llm): 添加 rms normalization 的 cuda 核函数
YdrMaster Jan 26, 2024
39785bd
todo(llm): How can I use cub in NVRTC?
YdrMaster Jan 26, 2024
5f518a3
fix(kernel): 修正 SliceInfo 优化计算
YdrMaster Jan 26, 2024
c462d7d
fix(kernel): 引入最新版本的 cccl 以支持 nvrtc 与 cub 配合使用
YdrMaster Jan 26, 2024
605c4c0
build: update backward-cpp to its master branch
YdrMaster Jan 26, 2024
0ebf34c
fix: format CMakeLists.txt
Chamberlain0w0 Jan 26, 2024
cc19556
feat(hardware): add cnnl
kilinchange Jan 26, 2024
d8a2dea
Merge branch 'dev' into dev-mlu-runtime
kilinchange Jan 26, 2024
f344ae8
feat: support pad operator | cpu/cuda kernel
bitzyz Jan 19, 2024
777d9c8
feat: 优化Pad算子
bitzyz Jan 26, 2024
b3b7d09
fix(hardware): fix file encoding bug and include bug
kilinchange Jan 26, 2024
d91d89c
Merge pull request #87 from InfiniTensor/dev-mlu-runtime
YdrMaster Jan 26, 2024
85a8004
fix(kernel): 修改pad/slice的diminfo; 删除部分注释
bitzyz Jan 26, 2024
0e3c71d
Merge pull request #86 from InfiniTensor/dev-pad
YdrMaster Jan 26, 2024
817cc8f
fix: fix for PRs
YdrMaster Jan 26, 2024
5f54011
fix(kernel): 改正 rms norm 以支持任意长的 BlockReduce
YdrMaster Jan 26, 2024
add61cb
feat(llm): 注册 rms normalization
YdrMaster Jan 29, 2024
daeac28
feat: 搭建 Attention 在各层的基本结构
YdrMaster Jan 29, 2024
722e6d9
feat(llm): 实现 attention 形状推导
YdrMaster Jan 29, 2024
12e264a
fix(hardware): typo
YdrMaster Jan 29, 2024
8595be4
refactor(kernel): 重构 transpose info 计算
YdrMaster Jan 29, 2024
f4d063a
refactor(kernel): 重构 transpose info 和 kernel 的构造
YdrMaster Jan 29, 2024
bbed31e
refactor(kernel): transpose 支持合并连续访存
YdrMaster Jan 29, 2024
f1faf3e
feat: 添加 attention cuda
YdrMaster Jan 29, 2024
9495516
build(kernel): 添加 cublasLt
YdrMaster Jan 30, 2024
6630866
fix(kernel): 为 Gather 支持负的 indices 值
YdrMaster Jan 30, 2024
e237349
add max/min cuda kernel and test
kilinchange Jan 29, 2024
35dc6c8
Merge pull request #84 from InfiniTensor/add_max_min_kernel
YdrMaster Jan 31, 2024
27a8ad6
fix(frontend): 填充边信息时不能接受输入边不存在
YdrMaster Jan 31, 2024
eea2249
fix(fronted): 为适配torchvision模型,修改前端从而接调用cpu kernel
bitzyz Jan 30, 2024
3f5b0ee
fix(fronted): 修复reduce算子对reducesum的opset特判bug
bitzyz Jan 31, 2024
7db54c1
fix(kernel):修复transpose info存在的bug
bitzyz Jan 31, 2024
b3e89a6
fix(kernel): 为 TransposeInfo 应对更多 coner case 并增加一个单元测试
YdrMaster Jan 31, 2024
d076c20
Merge pull request #88 from InfiniTensor/fix-torchvision-models
YdrMaster Feb 2, 2024
3199eb5
feat(kernel): 添加exp算子
bitzyz Feb 4, 2024
e3febd9
fix(kernel): 没有引用的输出张量也要分配,但计算完后立即释放
YdrMaster Feb 19, 2024
16870d6
feat (dist): nccl通信库接入,allreduce算子
PanZezhong1725 Feb 19, 2024
34ed834
Merge pull request #92 from InfiniTensor/dist-merge
YdrMaster Feb 19, 2024
3cb6f5d
Merge pull request #91 from InfiniTensor/dev-exp
YdrMaster Feb 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,6 @@
[submodule "src/09python_ffi/pybind11"]
path = src/09python_ffi/pybind11
url = git@github.com:pybind/pybind11.git
[submodule "3rd-party/cccl"]
path = 3rd-party/cccl
url = git@github.com:NVIDIA/cccl.git
2 changes: 1 addition & 1 deletion 3rd-party/backward-cpp
1 change: 1 addition & 0 deletions 3rd-party/cccl
Submodule cccl added at b7d422
34 changes: 34 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ message(STATUS "Project " ${PROJECT_NAME} " version " ${PROJECT_VERSION})
option(ABSL_PROPAGATE_CXX_STD "Abseil need this option" ON)
option(USE_CUDA "Support Nvidia GPU" OFF)
option(USE_KUNLUN "Support Baidu Kunlunxin" OFF)
option(USE_BANG "Support Hanwuji MLU" OFF)

set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
Expand Down Expand Up @@ -41,6 +42,38 @@ if(USE_KUNLUN)
message(STATUS "KUNLUN_HOME: ${KUNLUN_HOME}")
endif()

if (USE_BANG)
add_compile_definitions(USE_BANG)
include_directories(src/kernels/mlu/include)

# Neuware Evironment
if ((NOT DEFINED NEUWARE_HOME) AND (NOT DEFINED ENV{NEUWARE_HOME}))
message(FATAL_ERROR "NEUWARE_HOME is not defined from cmake or env")
elseif (DEFINED NEUWARE_HOME)
set(NEUWARE_HOME ${NEUWARE_HOME} CACHE STRING "NEUWARE_HOME directory for Cambricon Neuware development")
else()
set(NEUWARE_HOME $ENV{NEUWARE_HOME} CACHE STRING "NEUWARE_HOME directory for Cambricon Neuware development")
endif()
message(STATUS "NEUWARE_HOME: ${NEUWARE_HOME}")

# cnrt cndrv cnnl
include_directories("${NEUWARE_HOME}/include")
find_library(CAMBRICON_CNNL libcnnl.so "${NEUWARE_HOME}/lib64")
find_library(CAMBRICON_CNRT libcnrt.so "${NEUWARE_HOME}/lib64")
find_library(CAMBRICON_CNDRV libcndrv.so "${NEUWARE_HOME}/lib64")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lstdc++ -Wall")

if ((NOT DEFINED TARGET_CPU_ARCH) AND (NOT DEFINED ENV{TARGET_CPU_ARCH}))
execute_process(COMMAND uname -m OUTPUT_VARIABLE _uname_m OUTPUT_STRIP_TRAILING_WHITESPACE)
set(TARGET_CPU_ARCH "${_uname_m}" CACHE STRING "Target CPU ARCH")
elseif(DEFINED TARGET_CPU_ARCH)
set(TARGET_CPU_ARCH ${TARGET_CPU_ARCH} CACHE STRING "Target CPU ARCH")
else()
set(TARGET_CPU_ARCH $ENV{TARGET_CPU_ARCH} CACHE STRING "Target CPU ARCH")
endif()
message(STATUS "TARGET_CPU_ARCH: ${TARGET_CPU_ARCH}")
endif()

add_compile_options(-march=native) # this will cause error in some machine
add_compile_options(-mtune=native)
add_compile_options(-Wall)
Expand Down Expand Up @@ -72,4 +105,5 @@ add_subdirectory(src/05computation)
add_subdirectory(src/06frontend)
add_subdirectory(src/07onnx)
add_subdirectory(src/08communication)
add_subdirectory(src/08-01llm)
add_subdirectory(src/09python_ffi)
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@
TYPE ?= Debug
CUDA ?= OFF
KUNLUN ?= OFF
BANG ?= OFF

CMAKE_EXTRA =
# CMAKE_EXTRA += -DCMAKE_CXX_COMPILER=

build:
mkdir -p build
cmake -Bbuild -DCMAKE_BUILD_TYPE=$(TYPE) -DUSE_CUDA=$(CUDA) -DUSE_KUNLUN=$(KUNLUN) $(CMAKE_EXTRA)
cmake -Bbuild -DCMAKE_BUILD_TYPE=$(TYPE) -DUSE_CUDA=$(CUDA) -DUSE_KUNLUN=$(KUNLUN) -DUSE_BANG=$(BANG) $(CMAKE_EXTRA)
make -j -C build

install-python: build
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ executor = compiler.compile("cuda", "default", []) # -------- 编译模型
- [fmt 10.1.1](https://github.com/fmtlib/fmt/releases/tag/10.1.0)
- [fmtlog v2.2.1](https://github.com/MengRao/fmtlog/releases/tag/v2.2.1)
- [googletest v1.14.0](https://github.com/google/googletest/releases/tag/v1.14.0)
- [backward-cpp v1.6](https://github.com/bombela/backward-cpp/releases/tag/v1.6)
- [backward-cpp master](https://github.com/bombela/backward-cpp)
- [result master](https://github.com/willowell/result)
- [abseil-cpp 20230802.1](https://github.com/abseil/abseil-cpp/releases/tag/20230802.1)

Expand Down
3 changes: 1 addition & 2 deletions src/00common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,5 @@ file(GLOB_RECURSE COMMON_TEST test/*.cpp)
if(COMMON_TEST)
add_executable(common_test ${COMMON_TEST})
add_test(common_test common_test)
target_link_libraries(common_test common GTest::gtest_main ${BACKWARD_ENABLE})
add_backward(common_test)
target_link_libraries(common_test common GTest::gtest_main Backward::Object)
endif()
6 changes: 3 additions & 3 deletions src/00common/include/common/error_handler.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ namespace refactor {
std::abort()

#ifndef DISABLE_ASSERT
#define ASSERT(CONDITION, F, ...) \
{ \
if (!(CONDITION)) RUNTIME_ERROR(fmt::format("Assertion: " #F, ##__VA_ARGS__)); \
#define ASSERT(CONDITION, F, ...) \
{ \
if (!(CONDITION)) RUNTIME_ERROR(fmt::format("Assertion: " F, ##__VA_ARGS__)); \
}
#else
#define ASSERT(CONDITION, F)
Expand Down
3 changes: 2 additions & 1 deletion src/00common/include/common/rc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define RC_HPP

#include <functional>
#include <utility>

namespace refactor {

Expand All @@ -18,7 +19,7 @@ namespace refactor {
T *_value;
struct Counter {
size_t strong, weak;
} * _counter;
} *_counter;

Rc(T *ptr, Counter *counter) noexcept
: _value(ptr), _counter(counter) { inc(); }
Expand Down
3 changes: 1 addition & 2 deletions src/01graph_topo/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,5 @@ file(GLOB_RECURSE GRAPH_TOPO_TEST test/*.cpp)
if(GRAPH_TOPO_TEST)
add_executable(graph_topo_test ${GRAPH_TOPO_TEST})
add_test(graph_topo_test graph_topo_test)
target_link_libraries(graph_topo_test graph_topo GTest::gtest_main ${BACKWARD_ENABLE})
add_backward(graph_topo_test)
target_link_libraries(graph_topo_test graph_topo GTest::gtest_main Backward::Object)
endif()
3 changes: 1 addition & 2 deletions src/02hardware/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,5 @@ file(GLOB_RECURSE HARDWARE_TEST test/*.cpp)
if(HARDWARE_TEST)
add_executable(hardware_test ${HARDWARE_TEST})
add_test(hardware_test hardware_test)
target_link_libraries(hardware_test hardware GTest::gtest_main ${BACKWARD_ENABLE})
add_backward(hardware_test)
target_link_libraries(hardware_test hardware GTest::gtest_main Backward::Object)
endif()
2 changes: 2 additions & 0 deletions src/02hardware/include/hardware/device.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ namespace refactor::hardware {
enum class Type : int32_t {
Cpu,
Nvidia,
Mlu,
Kunlun,
};

protected:
Expand Down
19 changes: 19 additions & 0 deletions src/02hardware/include/hardware/devices/mlu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#ifndef HARDWARE_DEVICES_MLU_H
#define HARDWARE_DEVICES_MLU_H

#include "../device.h"

namespace refactor::hardware {

class Mlu final : public Device {
public:
explicit Mlu(int32_t card);
void setContext() const noexcept final;
Type type() const noexcept final {
return Type::Mlu;
}
};

}// namespace refactor::hardware

#endif// HARDWARE_DEVICES_MLU_H
2 changes: 2 additions & 0 deletions src/02hardware/src/device_manager.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "hardware/device_manager.h"
#include "hardware/devices/cpu.h"
#include "hardware/devices/mlu.h"
#include "hardware/devices/nvidia.h"

namespace refactor::hardware::device {
Expand Down Expand Up @@ -37,6 +38,7 @@ namespace refactor::hardware::device {
using T = Device::Type;
// clang-format off
auto device = type == T::Nvidia ? std::make_shared<Nvidia>(card)
: type == T::Mlu ? std::make_shared<Mlu>(card)
: UNREACHABLEX(Arc<Device>, "");
// clang-format on
auto [kind, ok] = DEVICES.try_emplace(static_cast<int32_t>(type));
Expand Down
10 changes: 5 additions & 5 deletions src/02hardware/src/devices/cpu/memory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,19 @@
namespace refactor::hardware {
using M = CpuMemory;

void *M::malloc(size_t size) noexcept {
void *M::malloc(size_t size) {
return std::malloc(size);
}
void M::free(void *ptr) noexcept {
void M::free(void *ptr) {
std::free(ptr);
}
void *M::copyHD(void *dst, void const *src, size_t bytes) const noexcept {
void *M::copyHD(void *dst, void const *src, size_t bytes) const {
return std::memcpy(dst, src, bytes);
}
void *M::copyDH(void *dst, void const *src, size_t bytes) const noexcept {
void *M::copyDH(void *dst, void const *src, size_t bytes) const {
return std::memcpy(dst, src, bytes);
}
void *M::copyDD(void *dst, void const *src, size_t bytes) const noexcept {
void *M::copyDD(void *dst, void const *src, size_t bytes) const {
return std::memcpy(dst, src, bytes);
}

Expand Down
10 changes: 5 additions & 5 deletions src/02hardware/src/devices/cpu/memory.hh
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
namespace refactor::hardware {

class CpuMemory final : public Memory {
void *malloc(size_t) noexcept final;
void free(void *) noexcept final;
void *copyHD(void *dst, void const *src, size_t bytes) const noexcept final;
void *copyDH(void *dst, void const *src, size_t bytes) const noexcept final;
void *copyDD(void *dst, void const *src, size_t bytes) const noexcept final;
void *malloc(size_t) final;
void free(void *) final;
void *copyHD(void *dst, void const *src, size_t bytes) const final;
void *copyDH(void *dst, void const *src, size_t bytes) const final;
void *copyDD(void *dst, void const *src, size_t bytes) const final;
};

}// namespace refactor::hardware
Expand Down
33 changes: 33 additions & 0 deletions src/02hardware/src/devices/mlu/device.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#include "functions.hh"
#include "hardware/devices/mlu.h"
#include "hardware/mem_pool.h"
#include "memory.hh"

namespace refactor::hardware {

static Arc<Memory> bangMemory(int32_t card) {
#ifdef USE_BANG
ASSERT(0 <= card && card < getDeviceCount(), "Invalid card id: {}", card);
setDevice(card);
auto [free, total] = getMemInfo();
auto size = std::min(free, std::max(5ul << 30, total * 4 / 5));
fmt::println("initializing Cambricon MLU {}, memory {} / {}, alloc {}",
card, free, total, size);
return std::make_shared<MemPool>(
std::make_shared<MluMemory>(),
size,
256ul);
#else
return nullptr;
#endif
}

Mlu::Mlu(int32_t card) : Device(card, bangMemory(card)) {}

void Mlu::setContext() const noexcept {
#ifdef USE_BANG
setDevice(_card);
#endif
}

}// namespace refactor::hardware
21 changes: 21 additions & 0 deletions src/02hardware/src/devices/mlu/functions.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#include "functions.hh"

namespace refactor::hardware {

#ifdef USE_BANG
int getDeviceCount() {
unsigned deviceCount;
BANG_ASSERT(cnrtGetDeviceCount(&deviceCount));
return static_cast<int>(deviceCount);
}
void setDevice(int device) {
BANG_ASSERT(cnrtSetDevice(device));
}
MemInfo getMemInfo() {
MemInfo memInfo;
BANG_ASSERT(cnrtMemGetInfo(&memInfo.free, &memInfo.total));
return memInfo;
}
#endif

}// namespace refactor::hardware
28 changes: 28 additions & 0 deletions src/02hardware/src/devices/mlu/functions.hh
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#ifndef HARDWARE_DEVICES_MLU_FUNCTIONS_CUH
#define HARDWARE_DEVICES_MLU_FUNCTIONS_CUH

#include "common.h"

#ifdef USE_BANG
#include "cnrt.h"

#define BANG_ASSERT(STATUS) \
if (auto status = (STATUS); status != CNRT_RET_SUCCESS) { \
RUNTIME_ERROR(fmt::format("bang failed on \"" #STATUS "\" with \"{}\" ({})", \
cnrtGetErrorStr(status), (int) status)); \
}
#endif

namespace refactor::hardware {

struct MemInfo {
size_t free, total;
};

int getDeviceCount();
void setDevice(int device);
MemInfo getMemInfo();

}// namespace refactor::hardware

#endif// HARDWARE_DEVICES_NVIDIA_FUNCTIONS_CUH
33 changes: 33 additions & 0 deletions src/02hardware/src/devices/mlu/memory.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#include "memory.hh"
#include "functions.hh"

namespace refactor::hardware {
#ifdef USE_BANG
using M = MluMemory;

void *M::malloc(size_t size) {
void *ptr;
BANG_ASSERT(cnrtMalloc(&ptr, size));
return ptr;
}
void M::free(void *ptr) {
BANG_ASSERT(cnrtFree(ptr));
}
void *M::copyHD(void *dst, void const *src, size_t bytes) const {
BANG_ASSERT(cnrtMemcpy(dst, const_cast<void *>(src), bytes,
CNRT_MEM_TRANS_DIR_HOST2DEV))
return dst;
}
void *M::copyDH(void *dst, void const *src, size_t bytes) const {
BANG_ASSERT(cnrtMemcpy(dst, const_cast<void *>(src), bytes,
CNRT_MEM_TRANS_DIR_DEV2HOST));
return dst;
}
void *M::copyDD(void *dst, void const *src, size_t bytes) const {
BANG_ASSERT(cnrtMemcpy(dst, const_cast<void *>(src), bytes,
CNRT_MEM_TRANS_DIR_PEER2PEER));
return dst;
}
#endif

}// namespace refactor::hardware
18 changes: 18 additions & 0 deletions src/02hardware/src/devices/mlu/memory.hh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#ifndef HARDWARE_DEVICES_MLU_MEMORY_CUH
#define HARDWARE_DEVICES_MLU_MEMORY_CUH

#include "hardware/memory.h"

namespace refactor::hardware {

class MluMemory final : public Memory {
void *malloc(size_t) final;
void free(void *) final;
void *copyHD(void *dst, void const *src, size_t bytes) const final;
void *copyDH(void *dst, void const *src, size_t bytes) const final;
void *copyDD(void *dst, void const *src, size_t bytes) const final;
};

}// namespace refactor::hardware

#endif// HARDWARE_DEVICES_MLU_MEMORY_HH
6 changes: 3 additions & 3 deletions src/02hardware/src/devices/nvidia/device.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ namespace refactor::hardware {

size_t free, total;
CUDA_ASSERT(cudaMemGetInfo(&free, &total));
auto size = std::min(free, std::max(5ul << 30, total * 4 / 5));
auto size = free * 9 / 10;
cudaDeviceProp prop;
CUDA_ASSERT(cudaGetDeviceProperties(&prop, 0));
CUDA_ASSERT(cudaGetDeviceProperties(&prop, card));
size_t alignment = prop.textureAlignment;
fmt::println("initializing Nvidia GPU {}, memory {} / {}, alloc {}, alignment {}",
card, free, total, size, alignment);
Expand All @@ -34,7 +34,7 @@ namespace refactor::hardware {
size,
alignment);
#else
RUNTIME_ERROR("CUDA is not enabled");
return nullptr;
#endif
}

Expand Down
3 changes: 1 addition & 2 deletions src/03runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,5 @@ file(GLOB_RECURSE RUNTIME_TEST test/*.cpp)
if(RUNTIME_TEST)
add_executable(runtime_test ${RUNTIME_TEST})
add_test(runtime_test runtime_test)
target_link_libraries(runtime_test runtime GTest::gtest_main ${BACKWARD_ENABLE})
add_backward(runtime_test)
target_link_libraries(runtime_test runtime GTest::gtest_main Backward::Object)
endif()
Loading