-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'dev' into dev-mlu-runtime
- Loading branch information
Showing
129 changed files
with
2,127 additions
and
407 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Submodule backward-cpp
updated
7 files
+1 −0 | .gitignore | |
+55 −32 | BackwardConfig.cmake | |
+54 −17 | CMakeLists.txt | |
+57 −23 | README.md | |
+79 −47 | backward.hpp | |
+2 −2 | test/suicide.cpp | |
+1 −1 | test_package/CMakeLists.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,31 +1,50 @@ | ||
#include "functions.cuh" | ||
#include "hardware/devices/nvidia.h" | ||
#include "hardware/mem_pool.h" | ||
#include "memory.cuh" | ||
|
||
#ifdef USE_CUDA | ||
#include "memory.hh" | ||
#include <cuda_runtime.h> | ||
|
||
#define CUDA_ASSERT(STATUS) \ | ||
if (auto status = (STATUS); status != cudaSuccess) { \ | ||
RUNTIME_ERROR(fmt::format("cuda failed on \"" #STATUS "\" with \"{}\" ({})", \ | ||
cudaGetErrorString(status), (int) status)); \ | ||
} | ||
#endif | ||
|
||
namespace refactor::hardware { | ||
|
||
static Arc<Memory> cudaMemory(int32_t card) { | ||
#ifdef USE_CUDA | ||
ASSERT(0 <= card && card < getDeviceCount(), "Invalid card id: {}", card); | ||
setDevice(card); | ||
auto [free, total] = getMemInfo(); | ||
auto size = std::min(free, std::max(5ul << 30, total * 4 / 5)); | ||
fmt::println("initializing Nvidia GPU {}, memory {} / {}, alloc {}", | ||
card, free, total, size); | ||
int deviceCount; | ||
CUDA_ASSERT(cudaGetDeviceCount(&deviceCount)); | ||
ASSERT(0 <= card && card < deviceCount, "Invalid card id: {}", card); | ||
CUDA_ASSERT(cudaSetDevice(card)); | ||
|
||
size_t free, total; | ||
CUDA_ASSERT(cudaMemGetInfo(&free, &total)); | ||
auto size = free * 9 / 10; | ||
cudaDeviceProp prop; | ||
CUDA_ASSERT(cudaGetDeviceProperties(&prop, 0)); | ||
size_t alignment = prop.textureAlignment; | ||
fmt::println("initializing Nvidia GPU {}, memory {} / {}, alloc {}, alignment {}", | ||
card, free, total, size, alignment); | ||
return std::make_shared<MemPool>( | ||
std::make_shared<NvidiaMemory>(), | ||
size, | ||
256ul); | ||
alignment); | ||
#else | ||
return nullptr; | ||
#endif | ||
} | ||
|
||
Nvidia::Nvidia(int32_t card) : Device(card, cudaMemory(card)) {} | ||
|
||
void Nvidia::setContext() const noexcept { | ||
setDevice(_card); | ||
void Nvidia::setContext() const { | ||
#ifdef USE_CUDA | ||
CUDA_ASSERT(cudaSetDevice(_card)); | ||
#endif | ||
} | ||
|
||
}// namespace refactor::hardware |
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
#ifndef KERNEL_HARD_SIGMOIG_H | ||
#define KERNEL_HARD_SIGMOIG_H | ||
|
||
#include "../collector.h" | ||
|
||
namespace refactor::kernel { | ||
|
||
struct HardSigmoidCollector final : public InfoCollector { | ||
float alpha, beta; | ||
|
||
constexpr HardSigmoidCollector(decltype(_target) target, float alpha_, float beta_) noexcept | ||
: InfoCollector(target), alpha(alpha_), beta(beta_) {} | ||
|
||
std::vector<KernelBox> | ||
filter(TensorRefs inputs, TensorRefs outputs) const final; | ||
}; | ||
}// namespace refactor::kernel | ||
|
||
#endif// KERNEL_HARD_SIGMOIG_H | ||
|
20 changes: 20 additions & 0 deletions
20
src/04kernel/include/kernel/collectors/rms_normalization.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
#ifndef KERNEL_RMS_NORMALIZATION_H | ||
#define KERNEL_RMS_NORMALIZATION_H | ||
|
||
#include "../collector.h" | ||
|
||
namespace refactor::kernel { | ||
|
||
struct RmsNormalizationCollector final : public InfoCollector { | ||
float epsilon; | ||
|
||
constexpr RmsNormalizationCollector(decltype(_target) target, float epsilon_) noexcept | ||
: InfoCollector(target), epsilon(epsilon_) {} | ||
|
||
std::vector<KernelBox> | ||
filter(TensorRefs inputs, TensorRefs outputs) const final; | ||
}; | ||
|
||
}// namespace refactor::kernel | ||
|
||
#endif// KERNEL_RMS_NORMALIZATION_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.