@@ -7,12 +7,12 @@ This enables matching up devices and information reported by the backend
77with tools (e.g. nvidia-smi) and system management libraries (e.g. nvml).
88---
99 ggml/include/ggml-backend.h | 1 +
10- ggml/src/ggml-cuda/ggml-cuda.cu | 39 ++++++++++++++++++++++++++++++++
10+ ggml/src/ggml-cuda/ggml-cuda.cu | 67 +++++++++++++++++++++++++++++---
1111 ggml/src/ggml-metal/ggml-metal.m | 1 +
12- 3 files changed, 41 insertions(+)
12+ 3 files changed, 63 insertions(+), 6 deletions(- )
1313
1414diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
15- index 74e46716..48839339 100644
15+ index 74e467163..48839339d 100644
1616--- a/ggml/include/ggml-backend.h
1717+++ b/ggml/include/ggml-backend.h
1818@@ -152,6 +152,7 @@ extern "C" {
@@ -24,18 +24,101 @@ index 74e46716..48839339 100644
2424 size_t memory_total;
2525 enum ggml_backend_dev_type type;
2626diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
27- index cb0d8528..d6960174 100644
27+ index cb0d8528d..1492368de 100644
2828--- a/ggml/src/ggml-cuda/ggml-cuda.cu
2929+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
30- @@ -2884,6 +2884,7 @@ struct ggml_backend_cuda_device_context {
30+ @@ -173,6 +173,51 @@ static int ggml_cuda_parse_id(char devName[]) {
31+ }
32+ #endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
33+
34+ + static std::string ggml_cuda_parse_uuid(cudaDeviceProp prop, int device_num) {
35+ + char id[64];
36+ +
37+ + #if !defined(GGML_USE_HIP)
38+ + snprintf(id, sizeof(id),
39+ + "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
40+ + (unsigned char)prop.uuid.bytes[0],
41+ + (unsigned char)prop.uuid.bytes[1],
42+ + (unsigned char)prop.uuid.bytes[2],
43+ + (unsigned char)prop.uuid.bytes[3],
44+ + (unsigned char)prop.uuid.bytes[4],
45+ + (unsigned char)prop.uuid.bytes[5],
46+ + (unsigned char)prop.uuid.bytes[6],
47+ + (unsigned char)prop.uuid.bytes[7],
48+ + (unsigned char)prop.uuid.bytes[8],
49+ + (unsigned char)prop.uuid.bytes[9],
50+ + (unsigned char)prop.uuid.bytes[10],
51+ + (unsigned char)prop.uuid.bytes[11],
52+ + (unsigned char)prop.uuid.bytes[12],
53+ + (unsigned char)prop.uuid.bytes[13],
54+ + (unsigned char)prop.uuid.bytes[14],
55+ + (unsigned char)prop.uuid.bytes[15]
56+ + );
57+ + #else
58+ + #ifdef _WIN32
59+ + snprintf(id, sizeof(id), "%d", device_num);
60+ + #else
61+ + try {
62+ + std::string uuid = std::string(prop.uuid.bytes, 16);
63+ +
64+ + size_t pos = 0;
65+ + unsigned long long v = stoull(uuid, &pos, 16);
66+ + if (v == 0 || pos != uuid.size() || (!uuid.empty() && uuid[0] == '-'))
67+ + throw std::invalid_argument("invalid uuid");
68+ +
69+ + snprintf(id, sizeof(id), "GPU-%016llx", v);
70+ + } catch (const std::exception &e) {
71+ + snprintf(id, sizeof(id), "%d", device_num);
72+ + }
73+ + #endif
74+ + #endif
75+ +
76+ + return id;
77+ + }
78+ +
79+ static ggml_cuda_device_info ggml_cuda_init() {
80+ #ifdef __HIP_PLATFORM_AMD__
81+ // Workaround for a rocBLAS bug when using multiple graphics cards:
82+ @@ -261,22 +306,24 @@ static ggml_cuda_device_info ggml_cuda_init() {
83+ info.devices[id].cc += prop.minor * 0x10;
84+ }
85+ }
86+ - GGML_LOG_INFO(" Device %d: %s, %s (0x%x), VMM: %s, Wave Size: %d\n",
87+ + GGML_LOG_INFO(" Device %d: %s, %s (0x%x), VMM: %s, Wave Size: %d, ID: %s\n",
88+ id, prop.name, prop.gcnArchName, info.devices[id].cc & 0xffff,
89+ - device_vmm ? "yes" : "no", prop.warpSize);
90+ + device_vmm ? "yes" : "no", prop.warpSize, ggml_cuda_parse_uuid(prop, id).c_str());
91+ #elif defined(GGML_USE_MUSA)
92+ // FIXME: Ensure compatibility with varying warp sizes across different MUSA archs.
93+ info.devices[id].warp_size = 32;
94+ info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
95+ info.devices[id].cc = GGML_CUDA_CC_OFFSET_MTHREADS + prop.major * 0x100;
96+ info.devices[id].cc += prop.minor * 0x10;
97+ - GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s\n",
98+ - id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no");
99+ + GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s, ID: %s\n",
100+ + id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no",
101+ + ggml_cuda_parse_uuid(prop, id).c_str());
102+ #else
103+ info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
104+ info.devices[id].cc = 100*prop.major + 10*prop.minor;
105+ - GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s\n",
106+ - id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no");
107+ + GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s, ID: %s\n",
108+ + id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no",
109+ + ggml_cuda_parse_uuid(prop, id).c_str());
110+ #endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
111+ }
112+
113+ @@ -2884,6 +2931,7 @@ struct ggml_backend_cuda_device_context {
31114 int device;
32115 std::string name;
33116 std::string description;
34117+ std::string id;
35118 };
36119
37120 static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) {
38- @@ -2896,6 +2897 ,11 @@ static const char * ggml_backend_cuda_device_get_description(ggml_backend_dev_t
121+ @@ -2896,6 +2944 ,11 @@ static const char * ggml_backend_cuda_device_get_description(ggml_backend_dev_t
39122 return ctx->description.c_str();
40123 }
41124
@@ -47,55 +130,24 @@ index cb0d8528..d6960174 100644
47130 static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
48131 ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
49132 ggml_cuda_set_device(ctx->device);
50- @@ -2910,6 +2916 ,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend
133+ @@ -2910,6 +2963 ,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend
51134 static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
52135 props->name = ggml_backend_cuda_device_get_name(dev);
53136 props->description = ggml_backend_cuda_device_get_description(dev);
54137+ props->id = ggml_backend_cuda_device_get_id(dev);
55138 props->type = ggml_backend_cuda_device_get_type(dev);
56139 ggml_backend_cuda_device_get_memory(dev, &props->memory_free, &props->memory_total);
57140
58- @@ -3458,6 +3465,38 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
141+ @@ -3457,6 +3511,7 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
142+ cudaDeviceProp prop;
59143 CUDA_CHECK(cudaGetDeviceProperties(&prop, i));
60144 dev_ctx->description = prop.name;
145+ + dev_ctx->id = ggml_cuda_parse_uuid(prop, i);
61146
62- + #if !defined(GGML_USE_HIP)
63- + char id[64];
64- + snprintf(id, sizeof(id),
65- + "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
66- + (unsigned char)prop.uuid.bytes[0],
67- + (unsigned char)prop.uuid.bytes[1],
68- + (unsigned char)prop.uuid.bytes[2],
69- + (unsigned char)prop.uuid.bytes[3],
70- + (unsigned char)prop.uuid.bytes[4],
71- + (unsigned char)prop.uuid.bytes[5],
72- + (unsigned char)prop.uuid.bytes[6],
73- + (unsigned char)prop.uuid.bytes[7],
74- + (unsigned char)prop.uuid.bytes[8],
75- + (unsigned char)prop.uuid.bytes[9],
76- + (unsigned char)prop.uuid.bytes[10],
77- + (unsigned char)prop.uuid.bytes[11],
78- + (unsigned char)prop.uuid.bytes[12],
79- + (unsigned char)prop.uuid.bytes[13],
80- + (unsigned char)prop.uuid.bytes[14],
81- + (unsigned char)prop.uuid.bytes[15]
82- + );
83- + dev_ctx->id = id;
84- + #else
85- + #ifdef _WIN32
86- + char id[16];
87- + snprintf(id, sizeof(id), "%d", i);
88- + dev_ctx->id = id;
89- + #else
90- + dev_ctx->id = "GPU-" + std::string(prop.uuid.bytes, 16);
91- + #endif
92- + #endif
93- +
94147 ggml_backend_dev_t dev = new ggml_backend_device {
95148 /* .iface = */ ggml_backend_cuda_device_interface,
96- /* .reg = */ ®,
97149diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m
98- index 1b56f858..a9eeebc6 100644
150+ index 1b56f858c..a9eeebc6a 100644
99151--- a/ggml/src/ggml-metal/ggml-metal.m
100152+++ b/ggml/src/ggml-metal/ggml-metal.m
101153@@ -5703,6 +5703,7 @@ static enum ggml_backend_dev_type ggml_backend_metal_device_get_type(ggml_backen
0 commit comments