Skip to content

ggml-cpu : split arch-specific implementations #13892

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 56 commits into from
Jun 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
56 commits
Select commit Hold shift + click to select a range
6814bd4
move ggml-cpu-aarch64 to repack
xctan May 28, 2025
a07340a
split quantize_row_q8_0/1
xctan May 28, 2025
82d7410
split helper functions
xctan May 28, 2025
ead5762
split ggml_vec_dot_q4_0_q8_0
xctan May 28, 2025
627e1ec
split ggml_vec_dot_q4_1_q8_1
xctan May 28, 2025
9582518
split ggml_vec_dot_q5_0_q8_0
xctan May 28, 2025
beca219
split ggml_vec_dot_q5_1_q8_1
xctan May 28, 2025
a32715a
split ggml_vec_dot_q8_0_q8_0
xctan May 28, 2025
a46eca7
split ggml_vec_dot_tq1_0_q8_K
xctan May 28, 2025
96a7f51
split ggml_vec_dot_tq2_0_q8_K
xctan May 28, 2025
5f881c9
split ggml_vec_dot_q2_K_q8_K
xctan May 28, 2025
91fbf27
split ggml_vec_dot_q3_K_q8_K
xctan May 28, 2025
58b6c62
split ggml_vec_dot_q4_K_q8_K
xctan May 28, 2025
6272e0c
split ggml_vec_dot_q5_K_q8_K
xctan May 28, 2025
7c7223f
split ggml_vec_dot_q6_K_q8_K
xctan May 28, 2025
9671c0e
split ggml_vec_dot_iq2_xxs_q8_K
xctan May 28, 2025
e4e1cfc
split ggml_vec_dot_iq2_xs_q8_K
xctan May 28, 2025
c9efc9e
split ggml_vec_dot_iq2_s_q8_K
xctan May 28, 2025
d1d2e24
split ggml_vec_dot_iq3_xxs_q8_K
xctan May 28, 2025
da6fcec
split ggml_vec_dot_iq3_s_q8_K
xctan May 28, 2025
3334b10
split ggml_vec_dot_iq1_s_q8_K
xctan May 28, 2025
93f0c4f
split ggml_vec_dot_iq1_m_q8_K
xctan May 29, 2025
3f4866f
split ggml_vec_dot_iq4_nl_q8_0
xctan May 29, 2025
740b3c9
split ggml_vec_dot_iq4_xs_q8_K
xctan May 29, 2025
9487b76
fix typos
xctan May 29, 2025
88e7e42
fix missing prototypes
xctan May 29, 2025
2252aa2
rename ggml-cpu-quants.c
xctan May 29, 2025
6df3dd5
rename ggml-cpu-traits
xctan May 29, 2025
3566ee8
rename arm folder
xctan May 29, 2025
f40ad8c
move cpu-feats-x86.cpp
xctan May 29, 2025
1ac2d5e
rename ggml-cpu-hbm
xctan May 29, 2025
321b3ac
update arm detection macro in quants.c
xctan May 29, 2025
7b5bf50
move iq quant tables
xctan May 29, 2025
bf3dbea
split ggml_quantize_mat_q8_0/K
xctan May 29, 2025
868c895
split ggml_gemv_*
xctan May 29, 2025
6a2ba77
split ggml_gemm_*
xctan May 29, 2025
72ddf5a
rename namespace aarch64 to repack
xctan May 29, 2025
ad52349
use weak aliases to replace test macros
xctan May 29, 2025
62dc3fd
rename GGML_CPU_AARCH64 to GGML_CPU_REPACK
xctan May 29, 2025
46b1e49
rename more aarch64 to repack
xctan May 29, 2025
5601df6
clean up rebase leftover
xctan May 29, 2025
827aec0
fix compilation errors
xctan May 29, 2025
58210b8
remove trailing spaces
xctan May 29, 2025
2739f4c
try to fix clang compilation errors
xctan May 29, 2025
8713f87
try to fix clang compilation errors again
xctan May 29, 2025
df27810
try to fix clang compilation errors, 3rd attempt
xctan May 29, 2025
553d8ca
try to fix clang compilation errors, 4th attempt
xctan May 29, 2025
9bfcd7e
try to fix clang compilation errors, 5th attempt
xctan May 29, 2025
08ebdd9
try to fix clang compilation errors, 6th attempt
xctan May 30, 2025
67eceec
try to fix clang compilation errors, 7th attempt
xctan May 30, 2025
01a1c5c
try to fix clang compilation errors, 8th attempt
xctan May 30, 2025
bef5b8d
try to fix clang compilation errors, 9th attempt
xctan May 30, 2025
47701d5
more cleanup
xctan Jun 4, 2025
e5b6fdb
fix compilation errors
xctan Jun 4, 2025
2573662
fix apple targets
xctan Jun 4, 2025
93e6718
fix a typo in arm version of ggml_vec_dot_q4_K_q8_K
xctan Jun 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,7 @@ ifdef LLAMA_SERVER_SSL
endif

ifndef GGML_NO_CPU_AARCH64
MK_CPPFLAGS += -DGGML_USE_CPU_AARCH64
MK_CPPFLAGS += -DGGML_USE_CPU_REPACK
endif

# warnings
Expand Down Expand Up @@ -970,7 +970,7 @@ OBJ_GGML = \
$(DIR_GGML)/src/ggml-threading.o \
$(DIR_GGML)/src/ggml-cpu/ggml-cpu.o \
$(DIR_GGML)/src/ggml-cpu/ggml-cpu_cpp.o \
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-aarch64.o \
$(DIR_GGML)/src/ggml-cpu/repack.o \
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-hbm.o \
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-quants.o \
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-traits.o \
Expand Down
2 changes: 1 addition & 1 deletion ggml/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ message(DEBUG "GGML_NATIVE_DEFAULT : ${GGML_NATIVE_DEFAULT}")
message(DEBUG "INS_ENB : ${INS_ENB}")

option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF)
option(GGML_CPU_AARCH64 "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON)
option(GGML_CPU_REPACK "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON)
option(GGML_CPU_KLEIDIAI "ggml: use KleidiAI optimized kernels if applicable" OFF)
option(GGML_SSE42 "ggml: enable SSE 4.2" ${INS_ENB})
option(GGML_AVX "ggml: enable AVX" ${INS_ENB})
Expand Down
4 changes: 4 additions & 0 deletions ggml/src/ggml-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -1074,6 +1074,10 @@ GGML_TABLE_BEGIN(uint32_t, iq3s_grid, 512)
0x0f090307, 0x0f090501, 0x0f090b01, 0x0f0b0505, 0x0f0b0905, 0x0f0d0105, 0x0f0d0703, 0x0f0f0101,
GGML_TABLE_END()

GGML_TABLE_BEGIN(int8_t, kvalues_iq4nl, 16)
-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113,
GGML_TABLE_END()

#define NGRID_IQ1S 2048
#define IQ1S_DELTA 0.125f
#define IQ1M_DELTA 0.125f
Expand Down
46 changes: 34 additions & 12 deletions ggml/src/ggml-cpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
list (APPEND GGML_CPU_SOURCES
ggml-cpu/ggml-cpu.c
ggml-cpu/ggml-cpu.cpp
ggml-cpu/ggml-cpu-aarch64.cpp
ggml-cpu/ggml-cpu-aarch64.h
ggml-cpu/ggml-cpu-hbm.cpp
ggml-cpu/ggml-cpu-hbm.h
ggml-cpu/ggml-cpu-quants.c
ggml-cpu/ggml-cpu-quants.h
ggml-cpu/ggml-cpu-traits.cpp
ggml-cpu/ggml-cpu-traits.h
ggml-cpu/repack.cpp
ggml-cpu/repack.h
ggml-cpu/hbm.cpp
ggml-cpu/hbm.h
ggml-cpu/quants.c
ggml-cpu/quants.h
ggml-cpu/traits.cpp
ggml-cpu/traits.h
ggml-cpu/amx/amx.cpp
ggml-cpu/amx/amx.h
ggml-cpu/amx/mmq.cpp
Expand Down Expand Up @@ -84,6 +84,11 @@ function(ggml_add_cpu_backend_variant_impl tag_name)

if (GGML_SYSTEM_ARCH STREQUAL "ARM")
message(STATUS "ARM detected")
list(APPEND GGML_CPU_SOURCES
ggml-cpu/arch/arm/quants.c
ggml-cpu/arch/arm/repack.cpp
)

if (MSVC AND NOT CMAKE_C_COMPILER_ID STREQUAL "Clang")
message(FATAL_ERROR "MSVC is not supported for ARM, use clang")
else()
Expand Down Expand Up @@ -167,6 +172,11 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
endif()
elseif (GGML_SYSTEM_ARCH STREQUAL "x86")
message(STATUS "x86 detected")
list(APPEND GGML_CPU_SOURCES
ggml-cpu/arch/x86/quants.c
ggml-cpu/arch/x86/repack.cpp
)

if (MSVC)
# instruction set detection for MSVC only
if (GGML_NATIVE)
Expand Down Expand Up @@ -302,7 +312,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
# Since multiple variants of the CPU backend may be included in the same
# build, using set_source_files_properties() to set the arch flags is not possible
set(GGML_CPU_FEATS_NAME ${GGML_CPU_NAME}-feats)
add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/cpu-feats-x86.cpp)
add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/arch/x86/cpu-feats.cpp)
target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . .. ../include)
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE ${ARCH_DEFINITIONS})
target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE GGML_BACKEND_DL GGML_BACKEND_BUILD GGML_BACKEND_SHARED)
Expand All @@ -311,6 +321,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
endif()
elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
message(STATUS "PowerPC detected")
list(APPEND GGML_CPU_SOURCES ggml-cpu/arch/powerpc/quants.c)
if (GGML_NATIVE)
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
file(READ "/proc/cpuinfo" POWER10_M)
Expand Down Expand Up @@ -338,6 +349,8 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
endif()
elseif (GGML_SYSTEM_ARCH STREQUAL "loongarch64")
message(STATUS "loongarch64 detected")
list(APPEND GGML_CPU_SOURCES ggml-cpu/arch/loongarch/quants.c)

list(APPEND ARCH_FLAGS -march=loongarch64)
if (GGML_LASX)
list(APPEND ARCH_FLAGS -mlasx)
Expand All @@ -347,6 +360,10 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
endif()
elseif (GGML_SYSTEM_ARCH STREQUAL "riscv64")
message(STATUS "riscv64 detected")
list(APPEND GGML_CPU_SOURCES
ggml-cpu/arch/riscv/quants.c
ggml-cpu/arch/riscv/repack.cpp
)
if (GGML_RVV)
if (GGML_XTHEADVECTOR)
list(APPEND ARCH_FLAGS -march=rv64gc_xtheadvector -mabi=lp64d)
Expand All @@ -358,6 +375,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
endif()
elseif (GGML_SYSTEM_ARCH STREQUAL "s390x")
message(STATUS "s390x detected")
list(APPEND GGML_CPU_SOURCES ggml-cpu/arch/s390/quants.c)
file(READ "/proc/cpuinfo" CPUINFO_CONTENTS)
string(REGEX REPLACE "machine[ \t\r\n]*=[ \t\r\n]*([0-9]+)" "\\1" S390X_M ${CPUINFO_CONTENTS})

Expand All @@ -381,12 +399,16 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
if (GGML_VXE)
list(APPEND ARCH_FLAGS -mvx -mzvector)
endif()
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "wasm")
message(STATUS "Wasm detected")
list (APPEND GGML_CPU_SOURCES ggml-cpu/arch/wasm/quants.c)
else()
message(STATUS "Unknown architecture")
message(WARNING "Unknown CPU architecture. Falling back to generic implementations.")
list(APPEND ARCH_FLAGS -DGGML_CPU_GENERIC)
endif()

if (GGML_CPU_AARCH64)
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_AARCH64)
if (GGML_CPU_REPACK)
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_REPACK)
endif()

if (GGML_CPU_KLEIDIAI)
Expand Down
2 changes: 1 addition & 1 deletion ggml/src/ggml-cpu/amx/amx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#include "ggml-backend.h"
#include "ggml-impl.h"
#include "ggml-cpu.h"
#include "ggml-cpu-traits.h"
#include "traits.h"

#if defined(__gnu_linux__)
#include <sys/syscall.h>
Expand Down
2 changes: 1 addition & 1 deletion ggml/src/ggml-cpu/amx/mmq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#include "mmq.h"
#include "ggml-impl.h"
#include "ggml-cpu-impl.h"
#include "ggml-cpu-quants.h"
#include "quants.h"
#include "ggml-quants.h"
#include <algorithm>
#include <type_traits>
Expand Down
Loading
Loading