|
1 | | -# Lux GPU - GPU Acceleration Library |
| 1 | +# Lux GPU Core |
2 | 2 |
|
3 | | -High-performance GPU acceleration for blockchain and ML workloads. |
| 3 | +Lightweight plugin-based GPU acceleration library for blockchain and ML workloads. |
4 | 4 |
|
5 | | -## Supported Backends |
| 5 | +## Architecture |
6 | 6 |
|
7 | | -| Backend | Platform | GPU | Status | |
8 | | -|---------|----------|-----|--------| |
9 | | -| Metal | macOS | Apple Silicon, Intel | Stable | |
10 | | -| CUDA | Linux, Windows | NVIDIA | Stable | |
11 | | -| WebGPU | All | Any WebGPU-compatible | Beta | |
12 | | -| CPU | All | None (fallback) | Stable | |
| 7 | +This is the **core library only**. It provides: |
| 8 | +- **Stable ABI** (`backend_plugin.h`) - Plugin contract |
| 9 | +- **Plugin Loader** - Dynamic loading of backend plugins |
| 10 | +- **CPU Fallback** - Builtin CPU backend for any platform |
| 11 | +- **Tests** - Backend-agnostic test harness |
13 | 12 |
|
14 | | -## Installation |
| 13 | +Backend plugins are built and distributed separately: |
15 | 14 |
|
16 | | -### From Release Binaries |
| 15 | +| Plugin | Repo | Platform | Dependencies | |
| 16 | +|--------|------|----------|--------------| |
| 17 | +| Metal | `luxcpp/metal` | macOS arm64 | MLX, Metal.framework | |
| 18 | +| CUDA | `luxcpp/cuda` | Linux, Windows | CUDA Toolkit, CCCL | |
| 19 | +| WebGPU | `luxcpp/webgpu` | All | Dawn/wgpu, gpu.cpp | |
17 | 20 |
|
18 | | -Download the appropriate package for your platform from [Releases](../../releases). |
| 21 | +## Building |
19 | 22 |
|
20 | 23 | ```bash |
21 | | -# Linux (CUDA) |
22 | | -tar -xzf libaccel-linux-x86_64-cuda.tar.gz |
23 | | -export LUX_GPU_BACKEND_PATH=$PWD/linux-x86_64-cuda |
24 | | - |
25 | | -# macOS (Metal) |
26 | | -tar -xzf libaccel-macos-arm64.tar.gz |
27 | | -export LUX_GPU_BACKEND_PATH=$PWD/macos-arm64 |
28 | | - |
29 | | -# Windows (CUDA) |
30 | | -# Extract zip and add to PATH |
31 | | -``` |
32 | | - |
33 | | -### From Source |
34 | | - |
35 | | -```bash |
36 | | -# Core + Metal (macOS) |
37 | | -cmake -B build -DLUX_GPU_BUILD_METAL=ON |
38 | | -cmake --build build |
39 | | - |
40 | | -# Core + CUDA (Linux with NVIDIA GPU) |
41 | | -cmake -B build -DLUX_GPU_BUILD_CUDA=ON |
| 24 | +# Core only (CPU backend) |
| 25 | +cmake -B build |
42 | 26 | cmake --build build |
43 | 27 |
|
44 | | -# All available backends |
45 | | -cmake -B build -DLUX_GPU_BUILD_ALL_BACKENDS=ON |
46 | | -cmake --build build |
| 28 | +# Run tests |
| 29 | +ctest --test-dir build |
47 | 30 | ``` |
48 | 31 |
|
49 | 32 | ## Usage |
50 | 33 |
|
51 | | -### C API |
52 | | - |
53 | 34 | ```c |
54 | 35 | #include <lux/gpu.h> |
55 | 36 |
|
56 | 37 | int main() { |
57 | | - // Initialize library |
58 | | - if (lux_gpu_init() != LUX_GPU_SUCCESS) { |
59 | | - return 1; |
60 | | - } |
| 38 | + // Initialize (loads best available backend) |
| 39 | + lux_gpu_init(); |
61 | 40 |
|
62 | | - // Check available backends |
63 | | - printf("Backends available: %d\n", lux_gpu_backend_count()); |
| 41 | + // Or specify backend explicitly |
| 42 | + // lux_gpu_set_backend(LUX_BACKEND_CUDA); |
64 | 43 |
|
65 | | - // Create context (auto-selects best backend) |
66 | 44 | LuxContext* ctx = lux_gpu_create_context(-1); |
67 | 45 |
|
68 | | - // Allocate buffers |
69 | | - LuxBuffer* a = lux_gpu_alloc(ctx, 1024 * sizeof(float)); |
70 | | - LuxBuffer* b = lux_gpu_alloc(ctx, 1024 * sizeof(float)); |
71 | | - LuxBuffer* c = lux_gpu_alloc(ctx, 1024 * sizeof(float)); |
72 | | - |
73 | | - // Copy data to GPU |
74 | | - float data[1024]; |
75 | | - lux_gpu_copy_to_device(ctx, a, data, sizeof(data)); |
76 | | - lux_gpu_copy_to_device(ctx, b, data, sizeof(data)); |
77 | | - |
78 | | - // Perform operation |
79 | | - lux_gpu_add_f32(ctx, a, b, c, 1024); |
| 46 | + // Allocate and compute... |
| 47 | + LuxBuffer* buf = lux_gpu_alloc(ctx, 1024 * sizeof(float)); |
80 | 48 |
|
81 | | - // Sync and copy back |
82 | | - lux_gpu_sync(ctx); |
83 | | - lux_gpu_copy_to_host(ctx, c, data, sizeof(data)); |
84 | | - |
85 | | - // Cleanup |
86 | | - lux_gpu_free(ctx, a); |
87 | | - lux_gpu_free(ctx, b); |
88 | | - lux_gpu_free(ctx, c); |
| 49 | + lux_gpu_free(ctx, buf); |
89 | 50 | lux_gpu_destroy_context(ctx); |
90 | 51 | lux_gpu_shutdown(); |
91 | | - |
92 | | - return 0; |
93 | | -} |
94 | | -``` |
95 | | - |
96 | | -### Go Bindings |
97 | | - |
98 | | -```go |
99 | | -import "github.com/luxfi/node/accel" |
100 | | - |
101 | | -func main() { |
102 | | - if err := accel.Init(); err != nil { |
103 | | - log.Fatal(err) |
104 | | - } |
105 | | - defer accel.Shutdown() |
106 | | - |
107 | | - // Check available backends |
108 | | - for _, b := range accel.Backends() { |
109 | | - fmt.Printf("Backend: %s\n", b) |
110 | | - } |
111 | | - |
112 | | - // Create session with auto-detection |
113 | | - session, _ := accel.NewSession() |
114 | | - defer session.Close() |
115 | | - |
116 | | - // Or specify backend |
117 | | - session, _ = accel.NewSessionWithBackend(accel.BackendMetal) |
118 | 52 | } |
119 | 53 | ``` |
120 | 54 |
|
121 | 55 | ## Backend Selection |
122 | 56 |
|
123 | | -### Automatic Selection |
124 | | - |
125 | | -By default, the library selects backends in this priority order: |
| 57 | +At runtime, backends are selected in priority order: |
126 | 58 | 1. **CUDA** - If NVIDIA GPU detected |
127 | | -2. **Metal** - If running on macOS with Apple GPU |
| 59 | +2. **Metal** - If macOS arm64 |
128 | 60 | 3. **WebGPU** - Cross-platform fallback |
129 | | -4. **CPU** - Final fallback |
| 61 | +4. **CPU** - Final fallback (always available) |
| 62 | + |
| 63 | +Override via environment or API: |
| 64 | +```bash |
| 65 | +export LUX_BACKEND=cuda # or metal, webgpu, cpu |
| 66 | +``` |
130 | 67 |
|
131 | | -### Manual Selection |
| 68 | +## Plugin Loading |
132 | 69 |
|
133 | | -```c |
134 | | -// Environment variable |
135 | | -export LUX_BACKEND=metal # or cuda, webgpu, cpu |
| 70 | +Backends are loaded from: |
| 71 | +1. `LUX_GPU_BACKEND_PATH` environment variable |
| 72 | +2. System library paths (`/usr/lib/lux-gpu`, etc.) |
| 73 | +3. Relative to executable |
136 | 74 |
|
137 | | -// Or via API |
138 | | -lux_gpu_set_backend(LUX_BACKEND_METAL); |
139 | | -``` |
| 75 | +Plugin naming: `libluxgpu_backend_<name>.{so,dylib,dll}` |
140 | 76 |
|
141 | | -### Backend Discovery |
| 77 | +## ABI Stability |
142 | 78 |
|
| 79 | +The plugin ABI is versioned. Plugins must match the core ABI version: |
143 | 80 | ```c |
144 | | -// List available backends |
145 | | -int count = lux_gpu_backend_count(); |
146 | | -for (int i = 0; i < count; i++) { |
147 | | - LuxBackend backend = lux_gpu_get_backend(i); |
148 | | - printf("Backend %d: %s\n", i, lux_gpu_backend_name(backend)); |
149 | | -} |
150 | | -
|
151 | | -// Check specific capabilities |
152 | | -LuxCapabilities caps = lux_gpu_get_capabilities(LUX_BACKEND_METAL); |
153 | | -if (caps & LUX_CAP_MSM) { |
154 | | - printf("MSM supported on Metal\n"); |
155 | | -} |
| 81 | +// backend_plugin.h |
| 82 | +#define LUX_GPU_ABI_VERSION 1 |
156 | 83 | ``` |
157 | 84 |
|
158 | | -## Operations |
159 | | - |
160 | | -### Tensor Operations |
161 | | -- Element-wise: add, sub, mul, div |
162 | | -- Unary: exp, log, sqrt, tanh, sigmoid, relu, gelu |
163 | | -- Matrix: matmul, transpose |
164 | | -- Reductions: sum, max, min, mean |
165 | | -- Normalization: layer_norm, rms_norm |
166 | | -- Activation: softmax, log_softmax |
167 | | - |
168 | | -### Cryptographic Operations |
169 | | -- Curves: BLS12-381, BN254, secp256k1, Ed25519 |
170 | | -- Hashing: Poseidon2, Blake3, SHA256, Keccak |
171 | | -- ZK: NTT/INTT, MSM, polynomial operations |
172 | | -- KZG: commit, open, verify |
173 | | - |
174 | | -### FHE Operations |
175 | | -- TFHE: bootstrap, keyswitch |
176 | | -- Blind rotation |
177 | | -- Sample extraction |
178 | | -- Polynomial multiplication |
179 | | - |
180 | | -## Environment Variables |
181 | | - |
182 | | -| Variable | Description | Example | |
183 | | -|----------|-------------|---------| |
184 | | -| `LUX_BACKEND` | Force specific backend | `metal`, `cuda`, `webgpu`, `cpu` | |
185 | | -| `LUX_GPU_BACKEND_PATH` | Plugin search path | `/usr/local/lib/lux-gpu` | |
186 | | -| `LUX_GPU_DEVICE` | Device index | `0`, `1` | |
187 | | -| `LUX_GPU_DEBUG` | Enable debug logging | `1` | |
188 | | - |
189 | | -## Build Options |
190 | | - |
191 | | -| Option | Default | Description | |
192 | | -|--------|---------|-------------| |
193 | | -| `LUX_GPU_BUILD_METAL` | OFF | Build Metal backend | |
194 | | -| `LUX_GPU_BUILD_CUDA` | OFF | Build CUDA backend | |
195 | | -| `LUX_GPU_BUILD_WEBGPU` | OFF | Build WebGPU backend | |
196 | | -| `LUX_GPU_BUILD_ALL_BACKENDS` | OFF | Auto-detect and build all | |
197 | | -| `LUX_GPU_BUILD_TESTS` | ON | Build test suite | |
198 | | -| `LUX_GPU_BUILD_BENCHMARKS` | OFF | Build benchmarks | |
199 | | -| `LUX_GPU_EMBED_KERNELS` | ON | Embed kernel source in plugins | |
200 | | - |
201 | 85 | ## License |
202 | 86 |
|
203 | | -BSD-3-Clause-Eco - See LICENSE file. |
| 87 | +BSD-3-Clause-Eco |
0 commit comments