Skip to content

Commit 977ab65

Browse files
committed
vulkan: sort graph to allow more parallel execution
Add a backend proc to allow the backend to modify the graph. The vulkan implementation looks at which nodes depend on each other and greedily reorders them to group together nodes that don't depend on each other. It only reorders the nodes, doesn't change the contents of any of them. With #15489, this reduces the number of synchronizations needed.
1 parent c4df49a commit 977ab65

File tree

13 files changed

+162
-0
lines changed

13 files changed

+162
-0
lines changed

ggml/src/ggml-backend-impl.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,9 @@ extern "C" {
114114
void (*event_record)(ggml_backend_t backend, ggml_backend_event_t event);
115115
// wait for an event on on a different stream
116116
void (*event_wait) (ggml_backend_t backend, ggml_backend_event_t event);
117+
118+
// (optional) sort/optimize the nodes in the graph
119+
void (*optimize_graph) (ggml_backend_t backend, struct ggml_cgraph * cgraph);
117120
};
118121

119122
struct ggml_backend {

ggml/src/ggml-backend.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,13 @@ void ggml_backend_event_wait(ggml_backend_t backend, ggml_backend_event_t event)
463463
backend->iface.event_wait(backend, event);
464464
}
465465

466+
static void ggml_backend_optimize_graph(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
467+
GGML_ASSERT(backend);
468+
if (backend->iface.optimize_graph != NULL) {
469+
backend->iface.optimize_graph(backend, cgraph);
470+
}
471+
}
472+
466473
// Backend device
467474

468475
const char * ggml_backend_dev_name(ggml_backend_dev_t device) {
@@ -1702,6 +1709,16 @@ bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph *
17021709
return true;
17031710
}
17041711

1712+
static void ggml_backend_sched_optimize_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
1713+
GGML_ASSERT(sched);
1714+
// Run through each backend, before splitting, giving a chance to optimize.
1715+
// Would be better to have each backend optimize its own split, but sched->graph
1716+
// gets out of sync.
1717+
for (int i = 0; i < sched->n_backends; i++) {
1718+
ggml_backend_optimize_graph(sched->backends[i], graph);
1719+
}
1720+
}
1721+
17051722
bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
17061723
GGML_ASSERT(sched);
17071724
GGML_ASSERT((int)sched->hash_set.size >= graph->n_nodes + graph->n_leafs);
@@ -1710,6 +1727,8 @@ bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgra
17101727
sched->cur_copy = sched->next_copy;
17111728
sched->next_copy = (sched->next_copy + 1) % sched->n_copies;
17121729

1730+
ggml_backend_sched_optimize_graph(sched, graph);
1731+
17131732
ggml_backend_sched_split_graph(sched, graph);
17141733

17151734
if (!ggml_backend_sched_alloc_splits(sched)) {

ggml/src/ggml-blas/ggml-blas.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,7 @@ static struct ggml_backend_i blas_backend_i = {
270270
/* .graph_compute = */ ggml_backend_blas_graph_compute,
271271
/* .event_record = */ NULL,
272272
/* .event_wait = */ NULL,
273+
/* .optimize_graph = */ NULL,
273274
};
274275

275276
static ggml_guid_t ggml_backend_blas_guid(void) {

ggml/src/ggml-cann/ggml-cann.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2689,6 +2689,7 @@ static const ggml_backend_i ggml_backend_cann_interface = {
26892689
/* .graph_compute = */ ggml_backend_cann_graph_compute,
26902690
/* .event_record = */ ggml_backend_cann_event_record,
26912691
/* .event_wait = */ ggml_backend_cann_event_wait,
2692+
/* .optimize_graph = */ NULL,
26922693
};
26932694

26942695
/**

ggml/src/ggml-cpu/ggml-cpu.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,7 @@ static const struct ggml_backend_i ggml_backend_cpu_i = {
190190
/* .graph_compute = */ ggml_backend_cpu_graph_compute,
191191
/* .event_record = */ NULL,
192192
/* .event_wait = */ NULL,
193+
/* .optimize_graph = */ NULL,
193194
};
194195

195196
static ggml_guid_t ggml_backend_cpu_guid(void) {

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3135,6 +3135,7 @@ static const ggml_backend_i ggml_backend_cuda_interface = {
31353135
/* .graph_compute = */ ggml_backend_cuda_graph_compute,
31363136
/* .event_record = */ ggml_backend_cuda_event_record,
31373137
/* .event_wait = */ ggml_backend_cuda_event_wait,
3138+
/* .optimize_graph = */ NULL,
31383139
};
31393140

31403141
static ggml_guid_t ggml_backend_cuda_guid() {

ggml/src/ggml-metal/ggml-metal.m

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6513,6 +6513,7 @@ static void ggml_backend_metal_set_n_cb(ggml_backend_t backend, int n_cb) {
65136513
/* .graph_compute = */ ggml_backend_metal_graph_compute,
65146514
/* .event_record = */ NULL,
65156515
/* .event_wait = */ NULL,
6516+
/* .optimize_graph = */ NULL,
65166517
};
65176518

65186519
static ggml_guid_t ggml_backend_metal_guid(void) {

ggml/src/ggml-opencl/ggml-opencl.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2838,6 +2838,7 @@ static ggml_backend_i ggml_backend_opencl_i = {
28382838
/* .graph_compute = */ ggml_backend_opencl_graph_compute,
28392839
/* .event_record = */ NULL,
28402840
/* .event_wait = */ NULL,
2841+
/* .optimize_graph = */ NULL,
28412842
};
28422843

28432844
ggml_backend_t ggml_backend_opencl_init(void) {

ggml/src/ggml-rpc/ggml-rpc.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -795,6 +795,7 @@ static ggml_backend_i ggml_backend_rpc_interface = {
795795
/* .graph_compute = */ ggml_backend_rpc_graph_compute,
796796
/* .event_record = */ NULL,
797797
/* .event_wait = */ NULL,
798+
/* .optimize_graph = */ NULL,
798799
};
799800

800801
ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const char * endpoint) {

ggml/src/ggml-sycl/ggml-sycl.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4063,6 +4063,7 @@ static ggml_backend_i ggml_backend_sycl_interface = {
40634063
/* .graph_compute = */ ggml_backend_sycl_graph_compute,
40644064
/* .event_record = */ ggml_backend_sycl_event_record,
40654065
/* .event_wait = */ ggml_backend_sycl_event_wait,
4066+
/* .optimize_graph = */ NULL,
40664067
};
40674068

40684069
static ggml_guid_t ggml_backend_sycl_guid() {

0 commit comments

Comments
 (0)