Skip to content

Commit cf49428

Browse files
committed
ggml-backend : fix async copy from CPU
ggml-ci
1 parent 1e6f655 commit cf49428

File tree

1 file changed

+10
-10
lines changed

1 file changed

+10
-10
lines changed

ggml/src/ggml-backend.c

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -351,15 +351,10 @@ void ggml_backend_tensor_copy_async(ggml_backend_t backend_src, ggml_backend_t b
351351
}
352352

353353
// an async copy would normally happen after all the queued operations on both backends are completed
354-
// sync src, set_async dst
355-
if (ggml_backend_buffer_is_host(src->buffer)) {
356-
ggml_backend_synchronize(backend_src);
357-
ggml_backend_tensor_set_async(backend_dst, dst, src->data, 0, ggml_nbytes(src));
358-
} else {
359-
ggml_backend_synchronize(backend_src);
360-
ggml_backend_tensor_copy(src, dst);
361-
ggml_backend_synchronize(backend_dst);
362-
}
354+
// to simulate the same behavior, we need to synchronize both backends first, and do a blocking copy
355+
ggml_backend_synchronize(backend_src);
356+
ggml_backend_synchronize(backend_dst);
357+
ggml_backend_tensor_copy(src, dst);
363358
}
364359

365360
// events
@@ -1782,7 +1777,12 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s
17821777
} else {
17831778
ggml_backend_synchronize(split_backend);
17841779
}
1785-
ggml_backend_tensor_copy_async(input_backend, split_backend, input, input_cpy);
1780+
// try async copy, but if not possible, we can still use a sync copy without synchronizing the dst backend, since we handle the synchronization here with multiple copies and events
1781+
// TODO: add public function to facilitate this, since applications do not have direct access to the backend interface
1782+
if (!split_backend->iface.cpy_tensor_async || !split_backend->iface.cpy_tensor_async(input_backend, split_backend, input, input_cpy)) {
1783+
ggml_backend_synchronize(input_backend);
1784+
ggml_backend_tensor_copy(input, input_cpy);
1785+
}
17861786
}
17871787
}
17881788

0 commit comments

Comments
 (0)