Skip to content

Commit e9a1be0

Browse files
committed
CUDA: Simplify and improve CUDA graphs through use of indirect copy pointers
Previously there was complexity in the CUDA graphs implementation due frequently changing parameters to copy kernels associated with K and V cache pointers. This patch simplifies by using indirection to avoid such parameters frequently changing, avoiding the need for frequent graph updates. Fixes #12152
1 parent ba76543 commit e9a1be0

File tree

4 files changed

+96
-107
lines changed

4 files changed

+96
-107
lines changed

ggml/include/ggml-backend.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,9 @@ extern "C" {
349349
GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
350350
GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
351351

352+
// Copy destination pointers for copy operations pointers to backend
353+
GGML_API void ggml_backend_dest_ptrs_copy(char ** host_dest_ptrs, const int host_dest_ptrs_size);
354+
352355
#ifdef __cplusplus
353356
}
354357
#endif

ggml/src/ggml-cuda/common.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -711,7 +711,7 @@ struct ggml_cuda_graph {
711711
bool disable_due_to_failed_graph_capture = false;
712712
int number_consecutive_updates = 0;
713713
std::vector<ggml_graph_node_properties> ggml_graph_properties;
714-
std::vector<char **> updated_kernel_arg;
714+
std::vector<char *> cpy_dest_ptrs;
715715
#endif
716716
};
717717

0 commit comments

Comments
 (0)