Skip to content

Commit b38f8a4

Browse files
committed
whisper : add description of ggml_mul_mat_pad
1 parent f365543 commit b38f8a4

File tree

2 files changed

+15
-2
lines changed

2 files changed

+15
-2
lines changed

coreml/whisper-encoder.mm

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@
2424

2525
// select which device to run the Core ML model on
2626
MLModelConfiguration *config = [[MLModelConfiguration alloc] init];
27-
//config.computeUnits = MLComputeUnitsCPUAndGPU;
28-
config.computeUnits = MLComputeUnitsCPUAndNeuralEngine;
27+
config.computeUnits = MLComputeUnitsCPUAndGPU;
28+
//config.computeUnits = MLComputeUnitsCPUAndNeuralEngine;
2929
//config.computeUnits = MLComputeUnitsAll;
3030

3131
const void * data = CFBridgingRetain([[whisper_encoder_impl alloc] initWithContentsOfURL:url_model configuration:config error:nil]);

whisper.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,19 @@ static void ggml_graph_compute_helper(std::vector<uint8_t> & buf, ggml_cgraph *
136136
ggml_graph_compute(graph, &plan);
137137
}
138138

139+
// faster matrix multiplications for tensors that do not have dimension 0 divisible "pad"
140+
// the idea is to represent the original matrix multiplication:
141+
//
142+
// Z = X @ Y
143+
//
144+
// with two matrix multiplications:
145+
//
146+
// Z = [X_0; X_1] @ [Y_0; Y_1]
147+
//
148+
// here X_0 and Y_0 are views of X and Y that have dimension 0 divisible by "pad"
149+
// and X_1 and Y_1 are the remaining views. X_1 and Y_1 end up being small matrices that can be processed with more
150+
// general-purpose kernels
151+
//
139152
static struct ggml_tensor * ggml_mul_mat_pad(struct ggml_context * ctx, struct ggml_tensor * x, struct ggml_tensor * y, int pad = 32) {
140153
//#if !defined(GGML_USE_METAL)
141154
// return ggml_mul_mat(ctx, x, y);

0 commit comments

Comments
 (0)