Skip to content

Commit f470bc3

Browse files
xctanggerganov
andauthored
ggml-cpu : split arch-specific implementations (#13892)
* move ggml-cpu-aarch64 to repack * split quantize_row_q8_0/1 * split helper functions * split ggml_vec_dot_q4_0_q8_0 * split ggml_vec_dot_q4_1_q8_1 * split ggml_vec_dot_q5_0_q8_0 * split ggml_vec_dot_q5_1_q8_1 * split ggml_vec_dot_q8_0_q8_0 * split ggml_vec_dot_tq1_0_q8_K * split ggml_vec_dot_tq2_0_q8_K * split ggml_vec_dot_q2_K_q8_K * split ggml_vec_dot_q3_K_q8_K * split ggml_vec_dot_q4_K_q8_K * split ggml_vec_dot_q5_K_q8_K * split ggml_vec_dot_q6_K_q8_K * split ggml_vec_dot_iq2_xxs_q8_K * split ggml_vec_dot_iq2_xs_q8_K * split ggml_vec_dot_iq2_s_q8_K * split ggml_vec_dot_iq3_xxs_q8_K * split ggml_vec_dot_iq3_s_q8_K * split ggml_vec_dot_iq1_s_q8_K * split ggml_vec_dot_iq1_m_q8_K * split ggml_vec_dot_iq4_nl_q8_0 * split ggml_vec_dot_iq4_xs_q8_K * fix typos * fix missing prototypes * rename ggml-cpu-quants.c * rename ggml-cpu-traits * rename arm folder * move cpu-feats-x86.cpp * rename ggml-cpu-hbm * update arm detection macro in quants.c * move iq quant tables * split ggml_quantize_mat_q8_0/K * split ggml_gemv_* * split ggml_gemm_* * rename namespace aarch64 to repack * use weak aliases to replace test macros * rename GGML_CPU_AARCH64 to GGML_CPU_REPACK * rename more aarch64 to repack * clean up rebase leftover * fix compilation errors * remove trailing spaces * try to fix clang compilation errors * try to fix clang compilation errors again * try to fix clang compilation errors, 3rd attempt * try to fix clang compilation errors, 4th attempt * try to fix clang compilation errors, 5th attempt * try to fix clang compilation errors, 6th attempt * try to fix clang compilation errors, 7th attempt * try to fix clang compilation errors, 8th attempt * try to fix clang compilation errors, 9th attempt * more cleanup * fix compilation errors * fix apple targets * fix a typo in arm version of ggml_vec_dot_q4_K_q8_K Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
1 parent 8f47e25 commit f470bc3

36 files changed

+24327
-17206
lines changed

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,7 @@ ifdef LLAMA_SERVER_SSL
367367
endif
368368

369369
ifndef GGML_NO_CPU_AARCH64
370-
MK_CPPFLAGS += -DGGML_USE_CPU_AARCH64
370+
MK_CPPFLAGS += -DGGML_USE_CPU_REPACK
371371
endif
372372

373373
# warnings
@@ -970,7 +970,7 @@ OBJ_GGML = \
970970
$(DIR_GGML)/src/ggml-threading.o \
971971
$(DIR_GGML)/src/ggml-cpu/ggml-cpu.o \
972972
$(DIR_GGML)/src/ggml-cpu/ggml-cpu_cpp.o \
973-
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-aarch64.o \
973+
$(DIR_GGML)/src/ggml-cpu/repack.o \
974974
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-hbm.o \
975975
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-quants.o \
976976
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-traits.o \

ggml/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ message(DEBUG "GGML_NATIVE_DEFAULT : ${GGML_NATIVE_DEFAULT}")
105105
message(DEBUG "INS_ENB : ${INS_ENB}")
106106

107107
option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF)
108-
option(GGML_CPU_AARCH64 "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON)
108+
option(GGML_CPU_REPACK "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON)
109109
option(GGML_CPU_KLEIDIAI "ggml: use KleidiAI optimized kernels if applicable" OFF)
110110
option(GGML_SSE42 "ggml: enable SSE 4.2" ${INS_ENB})
111111
option(GGML_AVX "ggml: enable AVX" ${INS_ENB})

ggml/src/ggml-common.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1074,6 +1074,10 @@ GGML_TABLE_BEGIN(uint32_t, iq3s_grid, 512)
10741074
0x0f090307, 0x0f090501, 0x0f090b01, 0x0f0b0505, 0x0f0b0905, 0x0f0d0105, 0x0f0d0703, 0x0f0f0101,
10751075
GGML_TABLE_END()
10761076

1077+
GGML_TABLE_BEGIN(int8_t, kvalues_iq4nl, 16)
1078+
-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113,
1079+
GGML_TABLE_END()
1080+
10771081
#define NGRID_IQ1S 2048
10781082
#define IQ1S_DELTA 0.125f
10791083
#define IQ1M_DELTA 0.125f

ggml/src/ggml-cpu/CMakeLists.txt

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,14 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
1010
list (APPEND GGML_CPU_SOURCES
1111
ggml-cpu/ggml-cpu.c
1212
ggml-cpu/ggml-cpu.cpp
13-
ggml-cpu/ggml-cpu-aarch64.cpp
14-
ggml-cpu/ggml-cpu-aarch64.h
15-
ggml-cpu/ggml-cpu-hbm.cpp
16-
ggml-cpu/ggml-cpu-hbm.h
17-
ggml-cpu/ggml-cpu-quants.c
18-
ggml-cpu/ggml-cpu-quants.h
19-
ggml-cpu/ggml-cpu-traits.cpp
20-
ggml-cpu/ggml-cpu-traits.h
13+
ggml-cpu/repack.cpp
14+
ggml-cpu/repack.h
15+
ggml-cpu/hbm.cpp
16+
ggml-cpu/hbm.h
17+
ggml-cpu/quants.c
18+
ggml-cpu/quants.h
19+
ggml-cpu/traits.cpp
20+
ggml-cpu/traits.h
2121
ggml-cpu/amx/amx.cpp
2222
ggml-cpu/amx/amx.h
2323
ggml-cpu/amx/mmq.cpp
@@ -84,6 +84,11 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
8484

8585
if (GGML_SYSTEM_ARCH STREQUAL "ARM")
8686
message(STATUS "ARM detected")
87+
list(APPEND GGML_CPU_SOURCES
88+
ggml-cpu/arch/arm/quants.c
89+
ggml-cpu/arch/arm/repack.cpp
90+
)
91+
8792
if (MSVC AND NOT CMAKE_C_COMPILER_ID STREQUAL "Clang")
8893
message(FATAL_ERROR "MSVC is not supported for ARM, use clang")
8994
else()
@@ -167,6 +172,11 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
167172
endif()
168173
elseif (GGML_SYSTEM_ARCH STREQUAL "x86")
169174
message(STATUS "x86 detected")
175+
list(APPEND GGML_CPU_SOURCES
176+
ggml-cpu/arch/x86/quants.c
177+
ggml-cpu/arch/x86/repack.cpp
178+
)
179+
170180
if (MSVC)
171181
# instruction set detection for MSVC only
172182
if (GGML_NATIVE)
@@ -302,7 +312,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
302312
# Since multiple variants of the CPU backend may be included in the same
303313
# build, using set_source_files_properties() to set the arch flags is not possible
304314
set(GGML_CPU_FEATS_NAME ${GGML_CPU_NAME}-feats)
305-
add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/cpu-feats-x86.cpp)
315+
add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/arch/x86/cpu-feats.cpp)
306316
target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . .. ../include)
307317
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE ${ARCH_DEFINITIONS})
308318
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE GGML_BACKEND_DL GGML_BACKEND_BUILD GGML_BACKEND_SHARED)
@@ -311,6 +321,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
311321
endif()
312322
elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
313323
message(STATUS "PowerPC detected")
324+
list(APPEND GGML_CPU_SOURCES ggml-cpu/arch/powerpc/quants.c)
314325
if (GGML_NATIVE)
315326
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
316327
file(READ "/proc/cpuinfo" POWER10_M)
@@ -338,6 +349,8 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
338349
endif()
339350
elseif (GGML_SYSTEM_ARCH STREQUAL "loongarch64")
340351
message(STATUS "loongarch64 detected")
352+
list(APPEND GGML_CPU_SOURCES ggml-cpu/arch/loongarch/quants.c)
353+
341354
list(APPEND ARCH_FLAGS -march=loongarch64)
342355
if (GGML_LASX)
343356
list(APPEND ARCH_FLAGS -mlasx)
@@ -347,6 +360,10 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
347360
endif()
348361
elseif (GGML_SYSTEM_ARCH STREQUAL "riscv64")
349362
message(STATUS "riscv64 detected")
363+
list(APPEND GGML_CPU_SOURCES
364+
ggml-cpu/arch/riscv/quants.c
365+
ggml-cpu/arch/riscv/repack.cpp
366+
)
350367
if (GGML_RVV)
351368
if (GGML_XTHEADVECTOR)
352369
list(APPEND ARCH_FLAGS -march=rv64gc_xtheadvector -mabi=lp64d)
@@ -358,6 +375,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
358375
endif()
359376
elseif (GGML_SYSTEM_ARCH STREQUAL "s390x")
360377
message(STATUS "s390x detected")
378+
list(APPEND GGML_CPU_SOURCES ggml-cpu/arch/s390/quants.c)
361379
file(READ "/proc/cpuinfo" CPUINFO_CONTENTS)
362380
string(REGEX REPLACE "machine[ \t\r\n]*=[ \t\r\n]*([0-9]+)" "\\1" S390X_M ${CPUINFO_CONTENTS})
363381

@@ -381,12 +399,16 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
381399
if (GGML_VXE)
382400
list(APPEND ARCH_FLAGS -mvx -mzvector)
383401
endif()
402+
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "wasm")
403+
message(STATUS "Wasm detected")
404+
list (APPEND GGML_CPU_SOURCES ggml-cpu/arch/wasm/quants.c)
384405
else()
385-
message(STATUS "Unknown architecture")
406+
message(WARNING "Unknown CPU architecture. Falling back to generic implementations.")
407+
list(APPEND ARCH_FLAGS -DGGML_CPU_GENERIC)
386408
endif()
387409

388-
if (GGML_CPU_AARCH64)
389-
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_AARCH64)
410+
if (GGML_CPU_REPACK)
411+
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_REPACK)
390412
endif()
391413

392414
if (GGML_CPU_KLEIDIAI)

ggml/src/ggml-cpu/amx/amx.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
#include "ggml-backend.h"
66
#include "ggml-impl.h"
77
#include "ggml-cpu.h"
8-
#include "ggml-cpu-traits.h"
8+
#include "traits.h"
99

1010
#if defined(__gnu_linux__)
1111
#include <sys/syscall.h>

ggml/src/ggml-cpu/amx/mmq.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
#include "mmq.h"
99
#include "ggml-impl.h"
1010
#include "ggml-cpu-impl.h"
11-
#include "ggml-cpu-quants.h"
11+
#include "quants.h"
1212
#include "ggml-quants.h"
1313
#include <algorithm>
1414
#include <type_traits>

0 commit comments

Comments
 (0)