-
Notifications
You must be signed in to change notification settings - Fork 9.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support multiple GPUs (split mode) on SYCL backend #5806
Changes from 6 commits
f87da8e
33563a8
4c29df3
47a572d
6b01068
5db8896
e4cc412
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -125,18 +125,15 @@ static std::string get_gpu_info() { | |
#ifdef GGML_USE_SYCL | ||
int device_list[GGML_SYCL_MAX_DEVICES]; | ||
ggml_sycl_get_gpu_list(device_list, GGML_SYCL_MAX_DEVICES); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this can be removed now, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, rm it. |
||
|
||
for (int i = 0; i < GGML_SYCL_MAX_DEVICES; i++) { | ||
if (device_list[i] >0 ){ | ||
char buf[128]; | ||
ggml_sycl_get_device_description(i, buf, sizeof(buf)); | ||
id += buf; | ||
int count = ggml_backend_sycl_get_device_count(); | ||
for (int i = 0; i < count; i++) { | ||
char buf[128]; | ||
ggml_sycl_get_device_description(i, buf, sizeof(buf)); | ||
id += buf; | ||
if (i < count - 1) { | ||
id += "/"; | ||
} | ||
} | ||
if (id.length() >2 ) { | ||
id.pop_back(); | ||
} | ||
#endif | ||
// TODO: other backends | ||
return id; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,12 +8,19 @@ INPUT2="Building a website can be done in 10 simple steps:\nStep 1:" | |
source /opt/intel/oneapi/setvars.sh | ||
|
||
if [ $# -gt 0 ]; then | ||
export GGML_SYCL_DEVICE=$1 | ||
GGML_SYCL_DEVICE=$1 | ||
else | ||
export GGML_SYCL_DEVICE=0 | ||
GGML_SYCL_DEVICE=0 | ||
fi | ||
echo GGML_SYCL_DEVICE=$GGML_SYCL_DEVICE | ||
echo "use $GGML_SYCL_DEVICE as main GPU" | ||
#export GGML_SYCL_DEBUG=1 | ||
./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0 | ||
#./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 5 -e -ngl 33 -t 1 -s 0 | ||
|
||
|
||
#ZES_ENABLE_SYSMAN=1, Support to get free memory of GPU by sycl::aspect::ext_intel_free_memory. Recommended to use when --split-mode = layer. | ||
|
||
#use all GPUs with same max compute units | ||
ZES_ENABLE_SYSMAN=1 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0 -mg $GGML_SYCL_DEVICE | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, rm -mg |
||
|
||
#use main GPU only | ||
#ZES_ENABLE_SYSMAN=1 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0 -mg $GGML_SYCL_DEVICE -sm none | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes, accept it.