|  | 
| 64 | 64 |     bool use_fusion; | 
| 65 | 65 |     bool use_concurrency; | 
| 66 | 66 |     bool use_shared_buffers; | 
|  | 67 | +    bool use_graph_optimize; | 
| 67 | 68 | 
 | 
| 68 | 69 |     int debug_graph; | 
| 69 | 70 |     int debug_fusion; | 
|  | 
| 88 | 89 |     /*.use_fusion              =*/ true, | 
| 89 | 90 |     /*.use_concurrency         =*/ true, | 
| 90 | 91 |     /*.use_shared_buffers      =*/ true, | 
|  | 92 | +    /*.use_graph_optimize      =*/ true, | 
| 91 | 93 |     /*.debug_graph             =*/ 0, | 
| 92 | 94 |     /*.debug_fusion            =*/ 0, | 
| 93 | 95 |     /*.fuse_cnt                =*/ { 0 }, | 
|  | 
| 149 | 151 |                 ctx->use_shared_buffers = false; | 
| 150 | 152 |             } | 
| 151 | 153 | 
 | 
|  | 154 | +            ctx->use_graph_optimize = true; | 
|  | 155 | + | 
|  | 156 | +            if (getenv("GGML_METAL_GRAPH_OPTIMIZE_DISABLE") != NULL) { | 
|  | 157 | +                ctx->use_graph_optimize = false; | 
|  | 158 | +            } | 
|  | 159 | + | 
| 152 | 160 |             memset(ctx->fuse_cnt, 0, sizeof(ctx->fuse_cnt)); | 
| 153 | 161 | 
 | 
| 154 | 162 |             ctx->max_size = ctx->mtl_device.maxBufferLength; | 
| @@ -1105,6 +1113,7 @@ @implementation GGMLMetalClass | 
| 1105 | 1113 |     GGML_LOG_INFO("%s: use fusion            = %s\n", __func__, ctx_dev->use_fusion                  ? "true" : "false"); | 
| 1106 | 1114 |     GGML_LOG_INFO("%s: use concurrency       = %s\n", __func__, ctx_dev->use_concurrency             ? "true" : "false"); | 
| 1107 | 1115 |     GGML_LOG_INFO("%s: use shared buffers    = %s\n", __func__, ctx_dev->use_shared_buffers          ? "true" : "false"); | 
|  | 1116 | +    GGML_LOG_INFO("%s: use graph optimize    = %s\n", __func__, ctx_dev->use_graph_optimize          ? "true" : "false"); | 
| 1108 | 1117 |     GGML_LOG_INFO("%s: hasUnifiedMemory      = %s\n", __func__, ctx_dev->mtl_device.hasUnifiedMemory ? "true" : "false"); | 
| 1109 | 1118 | 
 | 
| 1110 | 1119 |     ctx->capture_next_compute = false; | 
| @@ -6726,11 +6735,13 @@ static enum ggml_status ggml_backend_metal_graph_compute(ggml_backend_t backend, | 
| 6726 | 6735 | } | 
| 6727 | 6736 | 
 | 
| 6728 | 6737 | static void ggml_backend_metal_graph_optimize(ggml_backend_t backend, struct ggml_cgraph * cgraph) { | 
| 6729 |  | -    GGML_UNUSED(backend); | 
|  | 6738 | +    struct ggml_backend_metal_device_context * ctx_dev = backend->device->context; | 
| 6730 | 6739 | 
 | 
| 6731 | 6740 |     //const int64_t t_start = ggml_time_us(); | 
| 6732 | 6741 | 
 | 
| 6733 |  | -    ggml_metal_graph_optimize(cgraph); | 
|  | 6742 | +    if (ctx_dev->use_graph_optimize) { | 
|  | 6743 | +        ggml_metal_graph_optimize(cgraph); | 
|  | 6744 | +    } | 
| 6734 | 6745 | 
 | 
| 6735 | 6746 |     //printf("%s: initial graph optimize took %.3f ms\n", __func__, (ggml_time_us() - t_start) / 1000.0); | 
| 6736 | 6747 | } | 
|  | 
0 commit comments