Skip to content

Commit bfbaa4d

Browse files
authored
whisper : make large version explicit + fix data size units (ggml-org#1493)
1 parent 1d79e78 commit bfbaa4d

16 files changed

+69
-69
lines changed

Makefile

+2-2
Original file line numberDiff line numberDiff line change
@@ -418,9 +418,9 @@ samples:
418418
.PHONY: medium
419419
.PHONY: large-v1
420420
.PHONY: large-v2
421-
.PHONY: large
421+
.PHONY: large-v3
422422

423-
tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large: main
423+
tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3: main
424424
bash ./models/download-ggml-model.sh $@
425425
@echo ""
426426
@echo "==============================================="

README.md

+8-8
Original file line numberDiff line numberDiff line change
@@ -231,18 +231,18 @@ make medium.en
231231
make medium
232232
make large-v1
233233
make large-v2
234-
make large
234+
make large-v3
235235
```
236236

237237
## Memory usage
238238

239-
| Model | Disk | Mem | SHA |
240-
| --- | --- | --- | --- |
241-
| tiny | 75 MB | ~125 MB | `bd577a113a864445d4c299885e0cb97d4ba92b5f` |
242-
| base | 142 MB | ~210 MB | `465707469ff3a37a2b9b8d8f89f2f99de7299dac` |
243-
| small | 466 MB | ~600 MB | `55356645c2b361a969dfd0ef2c5a50d530afd8d5` |
244-
| medium | 1.5 GB | ~1.7 GB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` |
245-
| large | 2.9 GB | ~3.3 GB | `ad82bf6a9043ceed055076d0fd39f5f186ff8062` |
239+
| Model | Disk | Mem |
240+
| --- | --- | --- |
241+
| tiny | 75 MiB | ~273 MB |
242+
| base | 142 MiB | ~388 MB |
243+
| small | 466 MiB | ~852 MB |
244+
| medium | 1.5 GiB | ~2.1 GB |
245+
| large | 2.9 GiB | ~3.9 GB |
246246

247247
## Quantization
248248

bindings/go/examples/go-model-download/main.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ const (
2424

2525
var (
2626
// The models which will be downloaded, if no model is specified as an argument
27-
modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large-v2", "ggml-large"}
27+
modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large-v2", "ggml-large-v3"}
2828
)
2929

3030
var (

examples/livestream.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ if [ -n "$3" ]; then
4848
fi
4949

5050
# Whisper models
51-
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large" )
51+
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" )
5252

5353
# list available models
5454
function list_models {

examples/twitch.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ help()
2121
echo "Usage: ./twitch.sh -s [step] -m [model] -t [threads] [url]"
2222
echo "options:"
2323
echo "-s Step in seconds (default is $step)."
24-
echo "-m Choose model, options are: 'tiny.en' 'tiny' 'base.en' 'base' 'small.en' 'small' 'medium.en' 'medium' 'large-v1' 'large-v2' 'large' (default is '$model')."
24+
echo "-m Choose model, options are: 'tiny.en' 'tiny' 'base.en' 'base' 'small.en' 'small' 'medium.en' 'medium' 'large-v1' 'large-v2' 'large-v3' (default is '$model')."
2525
echo "-t Number of threads to use."
2626
echo "-h Print this help page."
2727
echo

extra/convert-all.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/bin/bash
22

3-
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large" )
3+
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" )
44

55
for model in "${models[@]}"; do
66
python3 models/convert-pt-to-ggml.py ~/.cache/whisper/$model.pt ../whisper models/

ggml-metal.m

+9-9
Original file line numberDiff line numberDiff line change
@@ -346,9 +346,9 @@ static void ggml_metal_log(enum ggml_log_level level, const char * format, ...){
346346
}
347347

348348
GGML_METAL_LOG_INFO("%s: hasUnifiedMemory = %s\n", __func__, ctx->device.hasUnifiedMemory ? "true" : "false");
349-
GGML_METAL_LOG_INFO("%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
349+
GGML_METAL_LOG_INFO("%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1e6);
350350
if (ctx->device.maxTransferRate != 0) {
351-
GGML_METAL_LOG_INFO("%s: maxTransferRate = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0);
351+
GGML_METAL_LOG_INFO("%s: maxTransferRate = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1e6);
352352
} else {
353353
GGML_METAL_LOG_INFO("%s: maxTransferRate = built-in GPU\n", __func__);
354354
}
@@ -541,11 +541,11 @@ bool ggml_metal_add_buffer(
541541
ctx->buffers[ctx->n_buffers].metal = [ctx->device newBufferWithBytesNoCopy:data length:size_aligned options:MTLResourceStorageModeShared deallocator:nil];
542542

543543
if (ctx->buffers[ctx->n_buffers].metal == nil) {
544-
GGML_METAL_LOG_ERROR("%s: error: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, size_aligned / 1024.0 / 1024.0);
544+
GGML_METAL_LOG_ERROR("%s: error: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, size_aligned / 1e6);
545545
return false;
546546
}
547547

548-
GGML_METAL_LOG_INFO("%s: allocated '%-16s' buffer, size = %8.2f MB", __func__, name, size_aligned / 1024.0 / 1024.0);
548+
GGML_METAL_LOG_INFO("%s: allocated '%-16s' buffer, size = %8.2f MB", __func__, name, size_aligned / 1e6);
549549

550550
++ctx->n_buffers;
551551
} else {
@@ -565,11 +565,11 @@ bool ggml_metal_add_buffer(
565565
ctx->buffers[ctx->n_buffers].metal = [ctx->device newBufferWithBytesNoCopy:(void *) ((uint8_t *) data + i) length:size_step_aligned options:MTLResourceStorageModeShared deallocator:nil];
566566

567567
if (ctx->buffers[ctx->n_buffers].metal == nil) {
568-
GGML_METAL_LOG_ERROR("%s: error: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, size_step_aligned / 1024.0 / 1024.0);
568+
GGML_METAL_LOG_ERROR("%s: error: failed to allocate '%-16s' buffer, size = %8.2f MB\n", __func__, name, size_step_aligned / 1e6);
569569
return false;
570570
}
571571

572-
GGML_METAL_LOG_INFO("%s: allocated '%-16s' buffer, size = %8.2f MB, offs = %12ld", __func__, name, size_step_aligned / 1024.0 / 1024.0, i);
572+
GGML_METAL_LOG_INFO("%s: allocated '%-16s' buffer, size = %8.2f MB, offs = %12ld", __func__, name, size_step_aligned / 1e6, i);
573573
if (i + size_step < size) {
574574
GGML_METAL_LOG_INFO("\n");
575575
}
@@ -580,16 +580,16 @@ bool ggml_metal_add_buffer(
580580

581581
#if TARGET_OS_OSX
582582
GGML_METAL_LOG_INFO(", (%8.2f / %8.2f)",
583-
ctx->device.currentAllocatedSize / 1024.0 / 1024.0,
584-
ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
583+
ctx->device.currentAllocatedSize / 1e6,
584+
ctx->device.recommendedMaxWorkingSetSize / 1e6);
585585

586586
if (ctx->device.currentAllocatedSize > ctx->device.recommendedMaxWorkingSetSize) {
587587
GGML_METAL_LOG_WARN("%s: warning: current allocated size is greater than the recommended max working set size\n", __func__);
588588
} else {
589589
GGML_METAL_LOG_INFO("\n");
590590
}
591591
#else
592-
GGML_METAL_LOG_INFO(", (%8.2f)\n", ctx->device.currentAllocatedSize / 1024.0 / 1024.0);
592+
GGML_METAL_LOG_INFO(", (%8.2f)\n", ctx->device.currentAllocatedSize / 1e6);
593593
#endif
594594
}
595595

models/README.md

+13-13
Original file line numberDiff line numberDiff line change
@@ -39,19 +39,19 @@ https://huggingface.co/ggerganov/whisper.cpp/tree/main
3939

4040
## Available models
4141

42-
| Model | Disk | Mem | SHA |
43-
| --- | --- | --- | --- |
44-
| tiny | 75 MB | ~390 MB | `bd577a113a864445d4c299885e0cb97d4ba92b5f` |
45-
| tiny.en | 75 MB | ~390 MB | `c78c86eb1a8faa21b369bcd33207cc90d64ae9df` |
46-
| base | 142 MB | ~500 MB | `465707469ff3a37a2b9b8d8f89f2f99de7299dac` |
47-
| base.en | 142 MB | ~500 MB | `137c40403d78fd54d454da0f9bd998f78703390c` |
48-
| small | 466 MB | ~1.0 GB | `55356645c2b361a969dfd0ef2c5a50d530afd8d5` |
49-
| small.en | 466 MB | ~1.0 GB | `db8a495a91d927739e50b3fc1cc4c6b8f6c2d022` |
50-
| medium | 1.5 GB | ~2.6 GB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` |
51-
| medium.en | 1.5 GB | ~2.6 GB | `8c30f0e44ce9560643ebd10bbe50cd20eafd3723` |
52-
| large-v1 | 2.9 GB | ~4.7 GB | `b1caaf735c4cc1429223d5a74f0f4d0b9b59a299` |
53-
| large-v2 | 2.9 GB | ~4.7 GB | `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` |
54-
| large | 2.9 GB | ~4.7 GB | `ad82bf6a9043ceed055076d0fd39f5f186ff8062` |
42+
| Model | Disk | SHA |
43+
| --- | --- | --- |
44+
| tiny | 75 MiB | `bd577a113a864445d4c299885e0cb97d4ba92b5f` |
45+
| tiny.en | 75 MiB | `c78c86eb1a8faa21b369bcd33207cc90d64ae9df` |
46+
| base | 142 MiB | `465707469ff3a37a2b9b8d8f89f2f99de7299dac` |
47+
| base.en | 142 MiB | `137c40403d78fd54d454da0f9bd998f78703390c` |
48+
| small | 466 MiB | `55356645c2b361a969dfd0ef2c5a50d530afd8d5` |
49+
| small.en | 466 MiB | `db8a495a91d927739e50b3fc1cc4c6b8f6c2d022` |
50+
| medium | 1.5 GiB | `fd9727b6e1217c2f614f9b698455c4ffd82463b4` |
51+
| medium.en | 1.5 GiB | `8c30f0e44ce9560643ebd10bbe50cd20eafd3723` |
52+
| large-v1 | 2.9 GiB | `b1caaf735c4cc1429223d5a74f0f4d0b9b59a299` |
53+
| large-v2 | 2.9 GiB | `0f4c8e34f21cf1a914c59d8b3ce882345ad349d6` |
54+
| large-v3 | 2.9 GiB | `ad82bf6a9043ceed055076d0fd39f5f186ff8062` |
5555

5656
## Model files for testing purposes
5757

models/convert-h5-to-coreml.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -78,14 +78,14 @@ def convert_hf_whisper(hf_model_name_or_path: str, whisper_state_path: str):
7878
# Ported from models/convert-whisper-to-coreml.py
7979
if __name__ == "__main__":
8080
parser = argparse.ArgumentParser()
81-
parser.add_argument("--model-name", type=str, help="name of model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large, large-v1, large-v2)", required=True)
81+
parser.add_argument("--model-name", type=str, help="name of model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, large-v2, large-v3)", required=True)
8282
parser.add_argument("--model-path", type=str, help="path to the model (e.g. if published on HuggingFace: Oblivion208/whisper-tiny-cantonese)", required=True)
8383
parser.add_argument("--encoder-only", type=bool, help="only convert encoder", default=False)
8484
parser.add_argument("--quantize", type=bool, help="quantize weights to F16", default=False)
8585
parser.add_argument("--optimize-ane", type=bool, help="optimize for ANE execution (currently broken)", default=False)
8686
args = parser.parse_args()
8787

88-
if args.model_name not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large", "large-v1", "large-v2"]:
88+
if args.model_name not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large-v1", "large-v2", "large-v3"]:
8989
raise ValueError("Invalid model name")
9090

9191
pt_target_path = f"models/hf-{args.model_name}.pt"

models/convert-whisper-to-coreml.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -296,13 +296,13 @@ def convert_decoder(hparams, model, quantize=False):
296296

297297
if __name__ == "__main__":
298298
parser = argparse.ArgumentParser()
299-
parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large, large-v1, large-v2)", required=True)
299+
parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, large-v2, large-v3)", required=True)
300300
parser.add_argument("--encoder-only", type=bool, help="only convert encoder", default=False)
301301
parser.add_argument("--quantize", type=bool, help="quantize weights to F16", default=False)
302302
parser.add_argument("--optimize-ane", type=bool, help="optimize for ANE execution (currently broken)", default=False)
303303
args = parser.parse_args()
304304

305-
if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "small.en-tdrz", "medium", "medium.en", "large", "large-v1", "large-v2"]:
305+
if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "small.en-tdrz", "medium", "medium.en", "large-v1", "large-v2", "large-v3"]:
306306
raise ValueError("Invalid model name")
307307

308308
whisper = load_model(args.model).cpu()

models/convert-whisper-to-openvino.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,10 @@ def convert_encoder(hparams, encoder, mname):
3838

3939
if __name__ == "__main__":
4040
parser = argparse.ArgumentParser()
41-
parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large, large-v1, large-v2)", required=True)
41+
parser.add_argument("--model", type=str, help="model to convert (e.g. tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, large-v2, large-v3)", required=True)
4242
args = parser.parse_args()
4343

44-
if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large", "large-v1", "large-v2"]:
44+
if args.model not in ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large-v1", "large-v2", "large-v3"]:
4545
raise ValueError("Invalid model name")
4646

4747
whisper = load_model(args.model).cpu()

models/download-coreml-model.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ function get_script_path() {
1919
models_path="$(get_script_path)"
2020

2121
# Whisper models
22-
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large" )
22+
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" )
2323

2424
# list available models
2525
function list_models {

models/download-ggml-model.cmd

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ popd
88
set argc=0
99
for %%x in (%*) do set /A argc+=1
1010

11-
set models=tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large
11+
set models=tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3
1212

1313
if %argc% neq 1 (
1414
echo.

models/download-ggml-model.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ function get_script_path() {
2222
models_path="$(get_script_path)"
2323

2424
# Whisper models
25-
models=(
25+
models=(
2626
"tiny.en"
2727
"tiny"
2828
"tiny-q5_1"
@@ -42,7 +42,7 @@ models=(
4242
"medium.en-q5_0"
4343
"large-v1"
4444
"large-v2"
45-
"large"
45+
"large-v3"
4646
"large-q5_0"
4747
)
4848

tests/run-tests.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
cd `dirname $0`
2020

2121
# Whisper models
22-
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large" )
22+
models=( "tiny.en" "tiny" "base.en" "base" "small.en" "small" "medium.en" "medium" "large-v1" "large-v2" "large-v3" )
2323

2424
# list available models
2525
function list_models {

0 commit comments

Comments
 (0)