Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -151,13 +151,13 @@ if(BUILD_WITH_SHARED_LIBS)
message(STATUS "Notice: Building with shared libraries.")
list(APPEND 3RDPART_LIB_LIST "xdnn")
if(WITH_GPU)
list(APPEND 3RDPART_LIB_LIST "gpudnn")
list(APPEND 3RDPART_LIB_LIST "gpu-dnn")
endif()
else()
message(STATUS "Notice: Building with static libraries.")
list(APPEND 3RDPART_LIB_LIST "xdnn_static")
if(WITH_GPU)
list(APPEND 3RDPART_LIB_LIST "gpudnn_static")
list(APPEND 3RDPART_LIB_LIST "gpu-dnn")
endif()
endif()

Expand Down
2 changes: 1 addition & 1 deletion cmake/gpudnn.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ include(ExternalProject)
# cmake-format: off
ExternalProject_Add(gpudnn_lib
URL https://github.com/intel/xFasterTransformer/releases/download/gpuDNN/gpudnn_v0.1.tar.gz
URL_HASH MD5=05b3554413e454ed027014e44a5c7fe4
URL_HASH MD5=7082ae7dd35e5209ef8a2779526ff0d5
TIMEOUT 60
SOURCE_DIR ${CMAKE_SOURCE_DIR}/3rdparty/gpudnn
CONFIGURE_COMMAND ""
Expand Down
1 change: 0 additions & 1 deletion include/abstract_decoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ class AbstractDecoder {
// Return the decoding result, split offset, and split size
// The returned result is a split representing the possibilities of next token, like the shadow part in below graph
// splitOffset
// \
// \|<-splitSize->|
// _ ___________________________v______________________________________
// ^ | | ||||||||||||||| | |
Expand Down
2 changes: 1 addition & 1 deletion src/common/allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ static inline void *alloc(size_t nbytes, size_t alignment = 64) {

int err = posix_memalign(&data, alignment, nbytes);
if (err != 0) {
printf("Unable to allocate buffer with size of %lld, err=%d\n", nbytes, err);
printf("Unable to allocate buffer with size of %zu, err=%d\n", nbytes, err);
exit(-1);
}

Expand Down
2 changes: 1 addition & 1 deletion src/utils/environment.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ class Env {
#ifdef PIPELINE_PARALLEL
int value = atoi(xft_pipeline_value);
if (value >= 1)
pipelineStageValue() = value;
pipelineStageValue = value;
else
printf("[ERROR] XFT_PIPELINE_STAGE value need to be greater than 0.\n");
#else
Expand Down
12 changes: 10 additions & 2 deletions src/utils/matmul_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -1413,7 +1413,7 @@ class MMHelper {
break;
}
default:
printf(">>> onednn amx postAlg type %s not supported.", std::to_string(postAlg));
printf(">>> onednn amx postAlg type %s not supported.", std::to_string(postAlg).c_str());
exit(-1);
break;
}
Expand Down Expand Up @@ -2063,11 +2063,18 @@ class MMHelper {
return _mm512_mul_ps(v, vres);
};
auto silu = [](__m512 &v, int row, int col) {
__m512 vone = _mm512_set1_ps(1.0f);
const __m512 vone = _mm512_set1_ps(1.0f);
__m512 vp = BertUtil::vexp(v);
__m512 vrecip = _mm512_rcp14_ps(vp + vone);
return vp * vrecip * v;
};
auto gelu = [](__m512 &v, int row, int col) {
const __m512 vone = _mm512_set1_ps(1.0f);
const __m512 c1 = _mm512_set1_ps(1.702f);
__m512 vp = BertUtil::vexp(v * c1);
__m512 vrecip = _mm512_rcp14_ps(vp + vone);
return vp * vrecip * v;
};

switch (kind) {
case matmul_kinds::Basic: dequant_base(M, N, C_int32, ldc_int32, C, ldc, dequant_op, no_post_op); break;
Expand All @@ -2076,6 +2083,7 @@ class MMHelper {
dequant_base(M, N, C_int32, ldc_int32, C, ldc, dequant_op, biasadd_relu);
break;
case matmul_kinds::Silu: dequant_base(M, N, C_int32, ldc_int32, C, ldc, dequant_op, silu); break;
case matmul_kinds::Gelu: dequant_base(M, N, C_int32, ldc_int32, C, ldc, dequant_op, gelu); break;
case matmul_kinds::Resmul: dequant_base(M, N, C_int32, ldc_int32, C, ldc, dequant_op, resmul); break;
case matmul_kinds::Residential:
if (bias) {
Expand Down