Skip to content

Commit c201d0d

Browse files
reeselevinengxson
andauthored
Wasm (#9)
* webgpu : fix build on emscripten * more debugging stuff * test-backend-ops: force single thread on wasm * fix single-thread case for init_tensor_uniform * use jspi * add pthread * test: remember to set n_thread for cpu backend * Add buffer label and enable dawn-specific toggles to turn off some checks * Intermediate state * Fast working f16/f32 vec4 * Working float fast mul mat * Clean up naming of mul_mat to match logical model, start work on q mul_mat * Setup for subgroup matrix mat mul * Basic working subgroup matrix * Working subgroup matrix tiling * Handle weirder sg matrix sizes (but still % sg matrix size) * Working start to gemv * working f16 accumulation with shared memory staging * Print out available subgroup matrix configurations * Vectorize dst stores for sg matrix shader * Gemv working scalar * Minor set_rows optimization (#4) * updated optimization, fixed errors * non vectorized version now dispatches one thread per element * Simplify * Change logic for set_rows pipelines --------- Co-authored-by: Neha Abbas <nehaabbas@macbookpro.lan> Co-authored-by: Neha Abbas <nehaabbas@ReeseLevines-MacBook-Pro.local> Co-authored-by: Reese Levine <reeselevine1@gmail.com> * Comment on dawn toggles * Working subgroup matrix code for (semi)generic sizes * Remove some comments * Cleanup code * Update dawn version and move to portable subgroup size * Try to fix new dawn release * Update subgroup size comment * Only check for subgroup matrix configs if they are supported * Add toggles for subgroup matrix/f16 support on nvidia+vulkan * Make row/col naming consistent * Refactor shared memory loading * Move sg matrix stores to correct file * Working q4_0 * Formatting * Work with emscripten builds * Fix test-backend-ops emscripten for f16/quantized types * Use emscripten memory64 to support get_memory * Add build flags and try ci --------- Co-authored-by: Xuan Son Nguyen <son@huggingface.co>
1 parent 7c2b2ef commit c201d0d

File tree

10 files changed

+348
-139
lines changed

10 files changed

+348
-139
lines changed

.github/workflows/build.yml

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -547,6 +547,46 @@ jobs:
547547
# This is using llvmpipe and runs slower than other backends
548548
ctest -L main --verbose --timeout 3600
549549
550+
ubuntu-24-wasm-webgpu:
551+
runs-on: ubuntu-24.04
552+
553+
steps:
554+
- name: Clone
555+
id: checkout
556+
uses: actions/checkout@v4
557+
558+
- name: ccache
559+
uses: ggml-org/ccache-action@v1.2.16
560+
with:
561+
key: ubuntu-latest-wasm-webgpu
562+
evict-old-files: 1d
563+
564+
- name: Install Emscripten
565+
run: |
566+
git clone https://github.com/emscripten-core/emsdk.git
567+
cd emsdk
568+
./emsdk install latest
569+
./emsdk activate latest
570+
571+
- name: Fetch emdawnwebgpu
572+
run: |
573+
DAWN_TAG="v20251027.212519"
574+
EMDAWN_PKG="emdawnwebgpu_pkg-${DAWN_TAG}.zip"
575+
echo "Downloading ${EMDAWN_PKG}"
576+
curl -L -o emdawn.zip \
577+
"https://github.com/google/dawn/releases/download/${DAWN_TAG}/${EMDAWN_PKG}"
578+
unzip emdawn.zip
579+
580+
- name: Build WASM WebGPU
581+
run: |
582+
source emsdk/emsdk_env.sh
583+
emcmake cmake -B build-wasm \
584+
-DGGML_WEBGPU=ON \
585+
-DLLAMA_CURL=OFF \
586+
-DEMDAWNWEBGPU_DIR=emdawnwebgpu_pkg
587+
588+
cmake --build build-wasm --target test-backend-ops -j $(nproc)
589+
550590
ubuntu-22-cmake-hip:
551591
runs-on: ubuntu-22.04
552592
container: rocm/dev-ubuntu-22.04:6.1.2

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,3 +152,5 @@ poetry.toml
152152
# IDE
153153
*.code-workspace
154154
.windsurf/
155+
# emscripten
156+
a.out.*

CMakeLists.txt

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,17 @@ option(LLAMA_USE_SYSTEM_GGML "Use system libggml" OFF)
3636
if (EMSCRIPTEN)
3737
set(BUILD_SHARED_LIBS_DEFAULT OFF)
3838

39-
option(LLAMA_WASM_SINGLE_FILE "llama: embed WASM inside the generated llama.js" ON)
39+
# Use 64-bit memory to support backend_get_memory queries
40+
# TODO: analyze performance impact, see https://spidermonkey.dev/blog/2025/01/15/is-memory64-actually-worth-using
41+
add_compile_options("-sMEMORY64=1")
42+
add_link_options("-sMEMORY64=1")
43+
add_link_options("-sALLOW_MEMORY_GROWTH=1")
44+
45+
option(LLAMA_WASM_SINGLE_FILE "llama: embed WASM inside the generated llama.js" OFF)
46+
option(LLAMA_BUILD_HTML "llama: build HTML file" ON)
47+
if (LLAMA_BUILD_HTML)
48+
set(CMAKE_EXECUTABLE_SUFFIX ".html")
49+
endif()
4050
else()
4151
if (MINGW)
4252
set(BUILD_SHARED_LIBS_DEFAULT OFF)

common/arg.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
#include "http.h"
4040
#endif
4141

42+
#ifndef __EMSCRIPTEN__
4243
#ifdef __linux__
4344
#include <linux/limits.h>
4445
#elif defined(_WIN32)
@@ -50,8 +51,11 @@
5051
#else
5152
#include <sys/syslimits.h>
5253
#endif
54+
#endif
55+
5356
#define LLAMA_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083
5457

58+
5559
// isatty
5660
#if defined(_WIN32)
5761
#include <io.h>

common/common.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -889,6 +889,8 @@ std::string fs_get_cache_directory() {
889889
cache_directory = std::getenv("HOME") + std::string("/Library/Caches/");
890890
#elif defined(_WIN32)
891891
cache_directory = std::getenv("LOCALAPPDATA");
892+
#elif defined(__EMSCRIPTEN__)
893+
GGML_ABORT("not implemented on this platform");
892894
#else
893895
# error Unknown architecture
894896
#endif

ggml/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ option(GGML_WEBGPU "ggml: use WebGPU"
224224
option(GGML_WEBGPU_DEBUG "ggml: enable WebGPU debug output" OFF)
225225
option(GGML_WEBGPU_CPU_PROFILE "ggml: enable WebGPU profiling (CPU)" OFF)
226226
option(GGML_WEBGPU_GPU_PROFILE "ggml: enable WebGPU profiling (GPU)" OFF)
227-
227+
option(GGML_WEBGPU_JSPI "ggml: use JSPI for WebGPU" ON)
228228
option(GGML_ZDNN "ggml: use zDNN" OFF)
229229
option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT})
230230
option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF)

ggml/src/ggml-webgpu/CMakeLists.txt

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,15 +39,33 @@ add_dependencies(ggml-webgpu generate_shaders)
3939
if(EMSCRIPTEN)
4040
set(EMDAWNWEBGPU_DIR "" CACHE PATH "Path to emdawnwebgpu_pkg")
4141

42-
target_compile_options(ggml-webgpu PRIVATE "--use-port=${EMDAWNWEBGPU_DIR}/emdawnwebgpu.port.py")
43-
target_link_options(ggml-webgpu PRIVATE "--use-port=${EMDAWNWEBGPU_DIR}/emdawnwebgpu.port.py")
42+
if(NOT EMDAWNWEBGPU_DIR)
43+
# default built-in port
44+
target_compile_options(ggml-webgpu PRIVATE "--use-port=emdawnwebgpu")
45+
target_link_options(ggml-webgpu INTERFACE "--use-port=emdawnwebgpu")
46+
else()
47+
# custom port
48+
target_compile_options(ggml-webgpu PRIVATE "--use-port=${EMDAWNWEBGPU_DIR}/emdawnwebgpu.port.py")
49+
target_link_options(ggml-webgpu INTERFACE "--use-port=${EMDAWNWEBGPU_DIR}/emdawnwebgpu.port.py")
50+
endif()
51+
52+
if (GGML_WEBGPU_JSPI)
53+
target_compile_options(ggml-webgpu PRIVATE "-fwasm-exceptions")
54+
target_link_options(ggml-webgpu INTERFACE "-sJSPI" "-fwasm-exceptions")
55+
else()
56+
target_compile_options(ggml-webgpu PRIVATE "-fexceptions")
57+
target_link_options(ggml-webgpu INTERFACE "-sASYNCIFY" "-exceptions")
58+
endif()
4459
else()
4560
find_package(Dawn REQUIRED)
4661
set(DawnWebGPU_TARGET dawn::webgpu_dawn)
4762
endif()
4863

4964
if (GGML_WEBGPU_DEBUG)
5065
target_compile_definitions(ggml-webgpu PRIVATE GGML_WEBGPU_DEBUG=1)
66+
if(EMSCRIPTEN)
67+
target_link_options(ggml-webgpu INTERFACE "-sASSERTIONS=2")
68+
endif()
5169
endif()
5270

5371
if (GGML_WEBGPU_CPU_PROFILE)

0 commit comments

Comments
 (0)