Skip to content

Commit aa63205

Browse files
jessegrossrick-github
authored andcommitted
ggml: Use ordinal IDs for AMD GPUs on Linux when UUID is unavailable
Some AMD GPUs do not provide UUIDs and report only "XX". In these cases, we should use the ordinal ID as an alternate identifier. This is the same as we always need to do on Windows for AMD. In addition, this prints out the ID for each GPU when enumerating them for easier debugging in the future.
1 parent 898b15a commit aa63205

File tree

2 files changed

+148
-80
lines changed

2 files changed

+148
-80
lines changed

llama/patches/0017-ggml-Export-GPU-UUIDs.patch

Lines changed: 94 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,12 @@ This enables matching up devices and information reported by the backend
77
with tools (e.g. nvidia-smi) and system management libraries (e.g. nvml).
88
---
99
ggml/include/ggml-backend.h | 1 +
10-
ggml/src/ggml-cuda/ggml-cuda.cu | 39 ++++++++++++++++++++++++++++++++
10+
ggml/src/ggml-cuda/ggml-cuda.cu | 67 +++++++++++++++++++++++++++++---
1111
ggml/src/ggml-metal/ggml-metal.m | 1 +
12-
3 files changed, 41 insertions(+)
12+
3 files changed, 63 insertions(+), 6 deletions(-)
1313

1414
diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
15-
index 74e46716..48839339 100644
15+
index 74e467163..48839339d 100644
1616
--- a/ggml/include/ggml-backend.h
1717
+++ b/ggml/include/ggml-backend.h
1818
@@ -152,6 +152,7 @@ extern "C" {
@@ -24,18 +24,101 @@ index 74e46716..48839339 100644
2424
size_t memory_total;
2525
enum ggml_backend_dev_type type;
2626
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
27-
index cb0d8528..d6960174 100644
27+
index cb0d8528d..1492368de 100644
2828
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
2929
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
30-
@@ -2884,6 +2884,7 @@ struct ggml_backend_cuda_device_context {
30+
@@ -173,6 +173,51 @@ static int ggml_cuda_parse_id(char devName[]) {
31+
}
32+
#endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
33+
34+
+static std::string ggml_cuda_parse_uuid(cudaDeviceProp prop, int device_num) {
35+
+ char id[64];
36+
+
37+
+ #if !defined(GGML_USE_HIP)
38+
+ snprintf(id, sizeof(id),
39+
+ "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
40+
+ (unsigned char)prop.uuid.bytes[0],
41+
+ (unsigned char)prop.uuid.bytes[1],
42+
+ (unsigned char)prop.uuid.bytes[2],
43+
+ (unsigned char)prop.uuid.bytes[3],
44+
+ (unsigned char)prop.uuid.bytes[4],
45+
+ (unsigned char)prop.uuid.bytes[5],
46+
+ (unsigned char)prop.uuid.bytes[6],
47+
+ (unsigned char)prop.uuid.bytes[7],
48+
+ (unsigned char)prop.uuid.bytes[8],
49+
+ (unsigned char)prop.uuid.bytes[9],
50+
+ (unsigned char)prop.uuid.bytes[10],
51+
+ (unsigned char)prop.uuid.bytes[11],
52+
+ (unsigned char)prop.uuid.bytes[12],
53+
+ (unsigned char)prop.uuid.bytes[13],
54+
+ (unsigned char)prop.uuid.bytes[14],
55+
+ (unsigned char)prop.uuid.bytes[15]
56+
+ );
57+
+ #else
58+
+ #ifdef _WIN32
59+
+ snprintf(id, sizeof(id), "%d", device_num);
60+
+ #else
61+
+ try {
62+
+ std::string uuid = std::string(prop.uuid.bytes, 16);
63+
+
64+
+ size_t pos = 0;
65+
+ unsigned long long v = stoull(uuid, &pos, 16);
66+
+ if (v == 0 || pos != uuid.size() || (!uuid.empty() && uuid[0] == '-'))
67+
+ throw std::invalid_argument("invalid uuid");
68+
+
69+
+ snprintf(id, sizeof(id), "GPU-%016llx", v);
70+
+ } catch (const std::exception &e) {
71+
+ snprintf(id, sizeof(id), "%d", device_num);
72+
+ }
73+
+ #endif
74+
+ #endif
75+
+
76+
+ return id;
77+
+}
78+
+
79+
static ggml_cuda_device_info ggml_cuda_init() {
80+
#ifdef __HIP_PLATFORM_AMD__
81+
// Workaround for a rocBLAS bug when using multiple graphics cards:
82+
@@ -261,22 +306,24 @@ static ggml_cuda_device_info ggml_cuda_init() {
83+
info.devices[id].cc += prop.minor * 0x10;
84+
}
85+
}
86+
- GGML_LOG_INFO(" Device %d: %s, %s (0x%x), VMM: %s, Wave Size: %d\n",
87+
+ GGML_LOG_INFO(" Device %d: %s, %s (0x%x), VMM: %s, Wave Size: %d, ID: %s\n",
88+
id, prop.name, prop.gcnArchName, info.devices[id].cc & 0xffff,
89+
- device_vmm ? "yes" : "no", prop.warpSize);
90+
+ device_vmm ? "yes" : "no", prop.warpSize, ggml_cuda_parse_uuid(prop, id).c_str());
91+
#elif defined(GGML_USE_MUSA)
92+
// FIXME: Ensure compatibility with varying warp sizes across different MUSA archs.
93+
info.devices[id].warp_size = 32;
94+
info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
95+
info.devices[id].cc = GGML_CUDA_CC_OFFSET_MTHREADS + prop.major * 0x100;
96+
info.devices[id].cc += prop.minor * 0x10;
97+
- GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s\n",
98+
- id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no");
99+
+ GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s, ID: %s\n",
100+
+ id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no",
101+
+ ggml_cuda_parse_uuid(prop, id).c_str());
102+
#else
103+
info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
104+
info.devices[id].cc = 100*prop.major + 10*prop.minor;
105+
- GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s\n",
106+
- id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no");
107+
+ GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s, ID: %s\n",
108+
+ id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no",
109+
+ ggml_cuda_parse_uuid(prop, id).c_str());
110+
#endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
111+
}
112+
113+
@@ -2884,6 +2931,7 @@ struct ggml_backend_cuda_device_context {
31114
int device;
32115
std::string name;
33116
std::string description;
34117
+ std::string id;
35118
};
36119

37120
static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) {
38-
@@ -2896,6 +2897,11 @@ static const char * ggml_backend_cuda_device_get_description(ggml_backend_dev_t
121+
@@ -2896,6 +2944,11 @@ static const char * ggml_backend_cuda_device_get_description(ggml_backend_dev_t
39122
return ctx->description.c_str();
40123
}
41124

@@ -47,55 +130,24 @@ index cb0d8528..d6960174 100644
47130
static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
48131
ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
49132
ggml_cuda_set_device(ctx->device);
50-
@@ -2910,6 +2916,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend
133+
@@ -2910,6 +2963,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend
51134
static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
52135
props->name = ggml_backend_cuda_device_get_name(dev);
53136
props->description = ggml_backend_cuda_device_get_description(dev);
54137
+ props->id = ggml_backend_cuda_device_get_id(dev);
55138
props->type = ggml_backend_cuda_device_get_type(dev);
56139
ggml_backend_cuda_device_get_memory(dev, &props->memory_free, &props->memory_total);
57140

58-
@@ -3458,6 +3465,38 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
141+
@@ -3457,6 +3511,7 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
142+
cudaDeviceProp prop;
59143
CUDA_CHECK(cudaGetDeviceProperties(&prop, i));
60144
dev_ctx->description = prop.name;
145+
+ dev_ctx->id = ggml_cuda_parse_uuid(prop, i);
61146

62-
+ #if !defined(GGML_USE_HIP)
63-
+ char id[64];
64-
+ snprintf(id, sizeof(id),
65-
+ "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
66-
+ (unsigned char)prop.uuid.bytes[0],
67-
+ (unsigned char)prop.uuid.bytes[1],
68-
+ (unsigned char)prop.uuid.bytes[2],
69-
+ (unsigned char)prop.uuid.bytes[3],
70-
+ (unsigned char)prop.uuid.bytes[4],
71-
+ (unsigned char)prop.uuid.bytes[5],
72-
+ (unsigned char)prop.uuid.bytes[6],
73-
+ (unsigned char)prop.uuid.bytes[7],
74-
+ (unsigned char)prop.uuid.bytes[8],
75-
+ (unsigned char)prop.uuid.bytes[9],
76-
+ (unsigned char)prop.uuid.bytes[10],
77-
+ (unsigned char)prop.uuid.bytes[11],
78-
+ (unsigned char)prop.uuid.bytes[12],
79-
+ (unsigned char)prop.uuid.bytes[13],
80-
+ (unsigned char)prop.uuid.bytes[14],
81-
+ (unsigned char)prop.uuid.bytes[15]
82-
+ );
83-
+ dev_ctx->id = id;
84-
+ #else
85-
+ #ifdef _WIN32
86-
+ char id[16];
87-
+ snprintf(id, sizeof(id), "%d", i);
88-
+ dev_ctx->id = id;
89-
+ #else
90-
+ dev_ctx->id = "GPU-" + std::string(prop.uuid.bytes, 16);
91-
+ #endif
92-
+ #endif
93-
+
94147
ggml_backend_dev_t dev = new ggml_backend_device {
95148
/* .iface = */ ggml_backend_cuda_device_interface,
96-
/* .reg = */ &reg,
97149
diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m
98-
index 1b56f858..a9eeebc6 100644
150+
index 1b56f858c..a9eeebc6a 100644
99151
--- a/ggml/src/ggml-metal/ggml-metal.m
100152
+++ b/ggml/src/ggml-metal/ggml-metal.m
101153
@@ -5703,6 +5703,7 @@ static enum ggml_backend_dev_type ggml_backend_metal_device_get_type(ggml_backen

ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 54 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,51 @@ static int ggml_cuda_parse_id(char devName[]) {
175175
}
176176
#endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
177177

178+
static std::string ggml_cuda_parse_uuid(cudaDeviceProp prop, int device_num) {
179+
char id[64];
180+
181+
#if !defined(GGML_USE_HIP)
182+
snprintf(id, sizeof(id),
183+
"GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
184+
(unsigned char)prop.uuid.bytes[0],
185+
(unsigned char)prop.uuid.bytes[1],
186+
(unsigned char)prop.uuid.bytes[2],
187+
(unsigned char)prop.uuid.bytes[3],
188+
(unsigned char)prop.uuid.bytes[4],
189+
(unsigned char)prop.uuid.bytes[5],
190+
(unsigned char)prop.uuid.bytes[6],
191+
(unsigned char)prop.uuid.bytes[7],
192+
(unsigned char)prop.uuid.bytes[8],
193+
(unsigned char)prop.uuid.bytes[9],
194+
(unsigned char)prop.uuid.bytes[10],
195+
(unsigned char)prop.uuid.bytes[11],
196+
(unsigned char)prop.uuid.bytes[12],
197+
(unsigned char)prop.uuid.bytes[13],
198+
(unsigned char)prop.uuid.bytes[14],
199+
(unsigned char)prop.uuid.bytes[15]
200+
);
201+
#else
202+
#ifdef _WIN32
203+
snprintf(id, sizeof(id), "%d", device_num);
204+
#else
205+
try {
206+
std::string uuid = std::string(prop.uuid.bytes, 16);
207+
208+
size_t pos = 0;
209+
unsigned long long v = stoull(uuid, &pos, 16);
210+
if (v == 0 || pos != uuid.size() || (!uuid.empty() && uuid[0] == '-'))
211+
throw std::invalid_argument("invalid uuid");
212+
213+
snprintf(id, sizeof(id), "GPU-%016llx", v);
214+
} catch (const std::exception &e) {
215+
snprintf(id, sizeof(id), "%d", device_num);
216+
}
217+
#endif
218+
#endif
219+
220+
return id;
221+
}
222+
178223
static ggml_cuda_device_info ggml_cuda_init() {
179224
#ifdef __HIP_PLATFORM_AMD__
180225
// Workaround for a rocBLAS bug when using multiple graphics cards:
@@ -263,22 +308,24 @@ static ggml_cuda_device_info ggml_cuda_init() {
263308
info.devices[id].cc += prop.minor * 0x10;
264309
}
265310
}
266-
GGML_LOG_INFO(" Device %d: %s, %s (0x%x), VMM: %s, Wave Size: %d\n",
311+
GGML_LOG_INFO(" Device %d: %s, %s (0x%x), VMM: %s, Wave Size: %d, ID: %s\n",
267312
id, prop.name, prop.gcnArchName, info.devices[id].cc & 0xffff,
268-
device_vmm ? "yes" : "no", prop.warpSize);
313+
device_vmm ? "yes" : "no", prop.warpSize, ggml_cuda_parse_uuid(prop, id).c_str());
269314
#elif defined(GGML_USE_MUSA)
270315
// FIXME: Ensure compatibility with varying warp sizes across different MUSA archs.
271316
info.devices[id].warp_size = 32;
272317
info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
273318
info.devices[id].cc = GGML_CUDA_CC_OFFSET_MTHREADS + prop.major * 0x100;
274319
info.devices[id].cc += prop.minor * 0x10;
275-
GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s\n",
276-
id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no");
320+
GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s, ID: %s\n",
321+
id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no",
322+
ggml_cuda_parse_uuid(prop, id).c_str());
277323
#else
278324
info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
279325
info.devices[id].cc = 100*prop.major + 10*prop.minor;
280-
GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s\n",
281-
id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no");
326+
GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s, ID: %s\n",
327+
id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no",
328+
ggml_cuda_parse_uuid(prop, id).c_str());
282329
#endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
283330
}
284331

@@ -3475,38 +3522,7 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
34753522
cudaDeviceProp prop;
34763523
CUDA_CHECK(cudaGetDeviceProperties(&prop, i));
34773524
dev_ctx->description = prop.name;
3478-
3479-
#if !defined(GGML_USE_HIP)
3480-
char id[64];
3481-
snprintf(id, sizeof(id),
3482-
"GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
3483-
(unsigned char)prop.uuid.bytes[0],
3484-
(unsigned char)prop.uuid.bytes[1],
3485-
(unsigned char)prop.uuid.bytes[2],
3486-
(unsigned char)prop.uuid.bytes[3],
3487-
(unsigned char)prop.uuid.bytes[4],
3488-
(unsigned char)prop.uuid.bytes[5],
3489-
(unsigned char)prop.uuid.bytes[6],
3490-
(unsigned char)prop.uuid.bytes[7],
3491-
(unsigned char)prop.uuid.bytes[8],
3492-
(unsigned char)prop.uuid.bytes[9],
3493-
(unsigned char)prop.uuid.bytes[10],
3494-
(unsigned char)prop.uuid.bytes[11],
3495-
(unsigned char)prop.uuid.bytes[12],
3496-
(unsigned char)prop.uuid.bytes[13],
3497-
(unsigned char)prop.uuid.bytes[14],
3498-
(unsigned char)prop.uuid.bytes[15]
3499-
);
3500-
dev_ctx->id = id;
3501-
#else
3502-
#ifdef _WIN32
3503-
char id[16];
3504-
snprintf(id, sizeof(id), "%d", i);
3505-
dev_ctx->id = id;
3506-
#else
3507-
dev_ctx->id = "GPU-" + std::string(prop.uuid.bytes, 16);
3508-
#endif
3509-
#endif
3525+
dev_ctx->id = ggml_cuda_parse_uuid(prop, i);
35103526

35113527
ggml_backend_dev_t dev = new ggml_backend_device {
35123528
/* .iface = */ ggml_backend_cuda_device_interface,

0 commit comments

Comments
 (0)