Skip to content

Commit

Permalink
perf: make ggml_conv_2d faster
Browse files Browse the repository at this point in the history
  • Loading branch information
leejet committed Aug 26, 2023
1 parent 008d80a commit d765b95
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 1 deletion.
2 changes: 1 addition & 1 deletion ggml
Submodule ggml updated 2 files
+5 −3 include/ggml/ggml.h
+402 −89 src/ggml.c
12 changes: 12 additions & 0 deletions stable-diffusion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3256,6 +3256,10 @@ class StableDiffusionGGML {
}
}
}

#ifdef GGML_PERF
ggml_graph_print(&diffusion_graph);
#endif
int64_t t1 = ggml_time_ms();
LOG_INFO("step %d sampling completed, taking %.2fs", i + 1, (t1 - t0) * 1.0f / 1000);
LOG_DEBUG("diffusion graph use %.2fMB runtime memory: static %.2fMB, dynamic %.2fMB",
Expand Down Expand Up @@ -3345,6 +3349,10 @@ class StableDiffusionGGML {
int64_t t0 = ggml_time_ms();
ggml_graph_compute_with_ctx(ctx, &vae_graph, n_threads);
int64_t t1 = ggml_time_ms();

#ifdef GGML_PERF
ggml_graph_print(&vae_graph);
#endif
LOG_DEBUG("computing vae graph completed, taking %.2fs", (t1 - t0) * 1.0f / 1000);

result = ggml_dup_tensor(res_ctx, moments);
Expand Down Expand Up @@ -3470,6 +3478,10 @@ class StableDiffusionGGML {
int64_t t0 = ggml_time_ms();
ggml_graph_compute_with_ctx(ctx, &vae_graph, n_threads);
int64_t t1 = ggml_time_ms();

#ifdef GGML_PERF
ggml_graph_print(&vae_graph);
#endif
LOG_DEBUG("computing vae graph completed, taking %.2fs", (t1 - t0) * 1.0f / 1000);

result_img = ggml_dup_tensor(res_ctx, img);
Expand Down

0 comments on commit d765b95

Please sign in to comment.