Skip to content

Commit 454d97d

Browse files
committed
whisper : add integer quantization support
1 parent 0ccd674 commit 454d97d

File tree

5 files changed

+238
-96
lines changed

5 files changed

+238
-96
lines changed

examples/helpers.js

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,15 @@ function loadRemote(url, dst, size_mb, cbProgress, cbReady, cbCancel, cbPrint) {
145145
var db = event.target.result;
146146
var tx = db.transaction(['models'], 'readwrite');
147147
var os = tx.objectStore('models');
148-
var rq = os.put(data, url);
148+
149+
var rq = null;
150+
try {
151+
var rq = os.put(data, url);
152+
} catch (e) {
153+
cbPrint('loadRemote: failed to store "' + url + '" in the IndexedDB: \n' + e);
154+
cbCancel();
155+
return;
156+
}
149157

150158
rq.onsuccess = function (event) {
151159
cbPrint('loadRemote: "' + url + '" stored in the IndexedDB');
@@ -180,7 +188,6 @@ function loadRemote(url, dst, size_mb, cbProgress, cbReady, cbCancel, cbPrint) {
180188

181189
rq.onabort = function (event) {
182190
cbPrint('loadRemote: failed to open IndexedDB: abort');
183-
191+
cbCancel();
184192
};
185193
}
186-

examples/whisper.wasm/CMakeLists.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,9 @@ endif()
3131
set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \
3232
--bind \
3333
-s USE_PTHREADS=1 \
34-
-s PTHREAD_POOL_SIZE=8 \
35-
-s INITIAL_MEMORY=1500MB \
36-
-s TOTAL_MEMORY=1500MB \
34+
-s PTHREAD_POOL_SIZE_STRICT=0 \
35+
-s INITIAL_MEMORY=2000MB \
36+
-s TOTAL_MEMORY=2000MB \
3737
-s FORCE_FILESYSTEM=1 \
3838
-s EXPORTED_RUNTIME_METHODS=\"['print', 'printErr', 'ccall', 'cwrap']\" \
3939
${EXTRA_FLAGS} \

examples/whisper.wasm/emscripten.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,12 @@ std::thread g_worker;
1010

1111
std::vector<struct whisper_context *> g_contexts(4, nullptr);
1212

13+
static inline int mpow2(int n) {
14+
int p = 1;
15+
while (p <= n) p *= 2;
16+
return p/2;
17+
}
18+
1319
EMSCRIPTEN_BINDINGS(whisper) {
1420
emscripten::function("init", emscripten::optional_override([](const std::string & path_model) {
1521
if (g_worker.joinable()) {
@@ -43,7 +49,7 @@ EMSCRIPTEN_BINDINGS(whisper) {
4349
}
4450
}));
4551

46-
emscripten::function("full_default", emscripten::optional_override([](size_t index, const emscripten::val & audio, const std::string & lang, bool translate) {
52+
emscripten::function("full_default", emscripten::optional_override([](size_t index, const emscripten::val & audio, const std::string & lang, int nthreads, bool translate) {
4753
if (g_worker.joinable()) {
4854
g_worker.join();
4955
}
@@ -66,7 +72,7 @@ EMSCRIPTEN_BINDINGS(whisper) {
6672
params.print_special = false;
6773
params.translate = translate;
6874
params.language = whisper_is_multilingual(g_contexts[index]) ? lang.c_str() : "en";
69-
params.n_threads = std::min(8, (int) std::thread::hardware_concurrency());
75+
params.n_threads = std::min(nthreads, std::min(16, mpow2(std::thread::hardware_concurrency())));
7076
params.offset_ms = 0;
7177

7278
std::vector<float> pcmf32;

examples/whisper.wasm/index-tmpl.html

Lines changed: 89 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -40,21 +40,34 @@
4040

4141
Note that the computation is quite heavy and may take a few seconds to complete.<br>
4242
The transcription results will be displayed in the text area below.<br><br>
43-
<b>Important: your browser must support WASM SIMD instructions for this to work.</b>
43+
<b>Important:</b>
44+
<ul>
45+
<li>your browser must support WASM SIMD instructions for this to work</li>
46+
<li>quantized models are still in experimental stage (<a href="https://github.com/ggerganov/ggml/pull/27">more info</a>)</li>
47+
<li>Firefox cannot load files larger than 256 MB - use Chrome instead</li>
48+
</ul>
4449

45-
<br><br><hr>
50+
<hr>
4651

4752
<div id="model">
48-
Whisper model: <span id="model-whisper-status"></span>
53+
Whisper models: <span id="model-whisper-status"></span><br><br>
4954
<button id="fetch-whisper-tiny-en" onclick="loadWhisper('tiny.en')">tiny.en (75 MB)</button>
5055
<button id="fetch-whisper-tiny" onclick="loadWhisper('tiny')">tiny (75 MB)</button>
5156
<button id="fetch-whisper-base-en" onclick="loadWhisper('base.en')">base.en (142 MB)</button>
5257
<button id="fetch-whisper-base" onclick="loadWhisper('base')">base (142 MB)</button>
5358
<button id="fetch-whisper-small-en" onclick="loadWhisper('small.en')">small.en (466 MB)</button>
5459
<button id="fetch-whisper-small" onclick="loadWhisper('small')">small (466 MB)</button>
55-
<span id="fetch-whisper-progress"></span>
56-
5760
<input type="file" id="whisper-file" name="file" onchange="loadFile(event, 'whisper.bin')" />
61+
<br><br>
62+
Quantized models:<br><br>
63+
<button id="fetch-whisper-base-en-q4_0" onclick="loadWhisper('base-en-q4_0')">base.en (4bit, 49 MB)</button>
64+
<button id="fetch-whisper-base-q4_0" onclick="loadWhisper('base-q4_0')">base (4bit, 49 MB)</button>
65+
<button id="fetch-whisper-small-en-q4_0" onclick="loadWhisper('small-en-q4_0')">small.en (4bit, 152 MB)</button>
66+
<button id="fetch-whisper-small-q4_0" onclick="loadWhisper('small-q4_0')">small (4bit, 152 MB)</button><br>
67+
<button id="fetch-whisper-medium-en-q4_0" onclick="loadWhisper('medium-en-q4_0')">medium.en (4bit, 469 MB)</button>
68+
<button id="fetch-whisper-medium-q4_0" onclick="loadWhisper('medium-q4_0')">medium (4bit, 469 MB)</button>
69+
<button id="fetch-whisper-large-q4_0" onclick="loadWhisper('large-q4_0')">large (4bit, 985 MB)</button>
70+
<span id="fetch-whisper-progress"></span>
5871
</div>
5972

6073
<br>
@@ -161,6 +174,12 @@
161174
<option value="yi">Yiddish</option>
162175
</select>
163176
</td>
177+
<!-- Slider to select number of threads between 1 and 16 -->
178+
<td>
179+
Threads:
180+
<input type="range" id="threads" name="threads" min="1" max="16" value="8" onchange="changeThreads(this.value)" />
181+
<span id="threads-value">8</span>
182+
</td>
164183
<td>
165184
<button onclick="onProcess(false);">Transcribe</button>
166185
</td>
@@ -263,11 +282,13 @@
263282

264283
Module.FS_createDataFile("/", fname, buf, true, true);
265284

266-
model_whisper = fname;
285+
//model_whisper = fname;
267286

268287
document.getElementById('model-whisper-status').innerHTML = 'loaded "' + model_whisper + '"!';
269288

270289
printTextarea('storeFS: stored model: ' + fname + ' size: ' + buf.length);
290+
291+
document.getElementById('model').innerHTML = 'Model fetched: ' + model_whisper;
271292
}
272293

273294
function loadFile(event, fname) {
@@ -292,6 +313,15 @@
292313
document.getElementById('fetch-whisper-tiny' ).style.display = 'none';
293314
document.getElementById('fetch-whisper-base' ).style.display = 'none';
294315
document.getElementById('fetch-whisper-small' ).style.display = 'none';
316+
317+
document.getElementById('fetch-whisper-base-en-q4_0' ).style.display = 'none';
318+
document.getElementById('fetch-whisper-base-q4_0' ).style.display = 'none';
319+
document.getElementById('fetch-whisper-small-en-q4_0' ).style.display = 'none';
320+
document.getElementById('fetch-whisper-small-q4_0' ).style.display = 'none';
321+
document.getElementById('fetch-whisper-medium-en-q4_0').style.display = 'none';
322+
document.getElementById('fetch-whisper-medium-q4_0' ).style.display = 'none';
323+
document.getElementById('fetch-whisper-large-q4_0' ).style.display = 'none';
324+
295325
document.getElementById('whisper-file' ).style.display = 'none';
296326
document.getElementById('model-whisper-status' ).innerHTML = 'loaded model: ' + file.name;
297327
}
@@ -304,6 +334,14 @@
304334
'base': 'https://whisper.ggerganov.com/ggml-model-whisper-base.bin',
305335
'small.en': 'https://whisper.ggerganov.com/ggml-model-whisper-small.en.bin',
306336
'small': 'https://whisper.ggerganov.com/ggml-model-whisper-small.bin',
337+
338+
'base-en-q4_0': 'https://whisper.ggerganov.com/ggml-model-whisper-base.en-q4_0.bin',
339+
'base-q4_0': 'https://whisper.ggerganov.com/ggml-model-whisper-base-q4_0.bin',
340+
'small-en-q4_0': 'https://whisper.ggerganov.com/ggml-model-whisper-small.en-q4_0.bin',
341+
'small-q4_0': 'https://whisper.ggerganov.com/ggml-model-whisper-small-q4_0.bin',
342+
'medium-en-q4_0':'https://whisper.ggerganov.com/ggml-model-whisper-medium.en-q4_0.bin',
343+
'medium-q4_0': 'https://whisper.ggerganov.com/ggml-model-whisper-medium-q4_0.bin',
344+
'large-q4_0': 'https://whisper.ggerganov.com/ggml-model-whisper-large-q4_0.bin',
307345
};
308346

309347
let sizes = {
@@ -313,6 +351,14 @@
313351
'base': 142,
314352
'small.en': 466,
315353
'small': 466,
354+
355+
'base-en-q4_0': 49,
356+
'base-q4_0': 49,
357+
'small-en-q4_0': 152,
358+
'small-q4_0': 152,
359+
'medium-en-q4_0': 469,
360+
'medium-q4_0': 469,
361+
'large-q4_0': 985,
316362
};
317363

318364
let url = urls[model];
@@ -327,6 +373,15 @@
327373
document.getElementById('fetch-whisper-tiny' ).style.display = 'none';
328374
document.getElementById('fetch-whisper-base' ).style.display = 'none';
329375
document.getElementById('fetch-whisper-small' ).style.display = 'none';
376+
377+
document.getElementById('fetch-whisper-base-en-q4_0' ).style.display = 'none';
378+
document.getElementById('fetch-whisper-base-q4_0' ).style.display = 'none';
379+
document.getElementById('fetch-whisper-small-en-q4_0' ).style.display = 'none';
380+
document.getElementById('fetch-whisper-small-q4_0' ).style.display = 'none';
381+
document.getElementById('fetch-whisper-medium-en-q4_0').style.display = 'none';
382+
document.getElementById('fetch-whisper-medium-q4_0' ).style.display = 'none';
383+
document.getElementById('fetch-whisper-large-q4_0' ).style.display = 'none';
384+
330385
document.getElementById('whisper-file' ).style.display = 'none';
331386
document.getElementById('model-whisper-status' ).innerHTML = 'loading model: ' + model;
332387

@@ -337,12 +392,22 @@
337392

338393
cbCancel = function() {
339394
var el;
395+
340396
el = document.getElementById('fetch-whisper-tiny-en' ); if (el) el.style.display = 'inline-block';
341397
el = document.getElementById('fetch-whisper-base-en' ); if (el) el.style.display = 'inline-block';
342398
el = document.getElementById('fetch-whisper-small-en'); if (el) el.style.display = 'inline-block';
343399
el = document.getElementById('fetch-whisper-tiny' ); if (el) el.style.display = 'inline-block';
344400
el = document.getElementById('fetch-whisper-base' ); if (el) el.style.display = 'inline-block';
345401
el = document.getElementById('fetch-whisper-small' ); if (el) el.style.display = 'inline-block';
402+
403+
el = document.getElementById('fetch-whisper-base-en-q4_0' ); if (el) el.style.display = 'inline-block';
404+
el = document.getElementById('fetch-whisper-base-q4_0' ); if (el) el.style.display = 'inline-block';
405+
el = document.getElementById('fetch-whisper-small-en-q4_0' ); if (el) el.style.display = 'inline-block';
406+
el = document.getElementById('fetch-whisper-small-q4_0' ); if (el) el.style.display = 'inline-block';
407+
el = document.getElementById('fetch-whisper-medium-en-q4_0'); if (el) el.style.display = 'inline-block';
408+
el = document.getElementById('fetch-whisper-medium-q4_0' ); if (el) el.style.display = 'inline-block';
409+
el = document.getElementById('fetch-whisper-large-q4_0' ); if (el) el.style.display = 'inline-block';
410+
346411
el = document.getElementById('whisper-file' ); if (el) el.style.display = 'inline-block';
347412
el = document.getElementById('model-whisper-status' ); if (el) el.innerHTML = '';
348413
};
@@ -354,7 +419,8 @@
354419
// audio file
355420
//
356421

357-
const kMaxAudio_s = 120;
422+
const kMaxAudio_s = 30*60;
423+
const kMaxRecording_s = 2*60;
358424
const kSampleRate = 16000;
359425

360426
window.AudioContext = window.AudioContext || window.webkitAudioContext;
@@ -423,7 +489,7 @@
423489
doRecording = false;
424490
}
425491

426-
// record up to kMaxAudio_s seconds of audio from the microphone
492+
// record up to kMaxRecording_s seconds of audio from the microphone
427493
// check if doRecording is false every 1000 ms and stop recording if so
428494
// update progress information
429495
function startRecording() {
@@ -479,9 +545,9 @@
479545
printTextarea('js: audio recorded, size: ' + audio.length);
480546

481547
// truncate to first 30 seconds
482-
if (audio.length > kMaxAudio_s*kSampleRate) {
483-
audio = audio.slice(0, kMaxAudio_s*kSampleRate);
484-
printTextarea('js: truncated audio to first ' + kMaxAudio_s + ' seconds');
548+
if (audio.length > kMaxRecording_s*kSampleRate) {
549+
audio = audio.slice(0, kMaxRecording_s*kSampleRate);
550+
printTextarea('js: truncated audio to first ' + kMaxRecording_s + ' seconds');
485551
}
486552
setAudio(audio);
487553
});
@@ -509,24 +575,31 @@
509575
});
510576
}
511577

512-
document.getElementById('progress-bar').style.width = (100*(Date.now() - startTime)/1000/kMaxAudio_s) + '%';
513-
document.getElementById('progress-text').innerHTML = (100*(Date.now() - startTime)/1000/kMaxAudio_s).toFixed(0) + '%';
578+
document.getElementById('progress-bar').style.width = (100*(Date.now() - startTime)/1000/kMaxRecording_s) + '%';
579+
document.getElementById('progress-text').innerHTML = (100*(Date.now() - startTime)/1000/kMaxRecording_s).toFixed(0) + '%';
514580
}, 1000);
515581

516582
printTextarea('js: recording ...');
517583

518584
setTimeout(function() {
519585
if (doRecording) {
520-
printTextarea('js: recording stopped after ' + kMaxAudio_s + ' seconds');
586+
printTextarea('js: recording stopped after ' + kMaxRecording_s + ' seconds');
521587
stopRecording();
522588
}
523-
}, kMaxAudio_s*1000);
589+
}, kMaxRecording_s*1000);
524590
}
525591

526592
//
527593
// transcribe
528594
//
529595

596+
var nthreads = 8;
597+
598+
function changeThreads(value) {
599+
nthreads = value;
600+
document.getElementById('threads-value').innerHTML = nthreads;
601+
}
602+
530603
function onProcess(translate) {
531604
if (!instance) {
532605
instance = Module.init('whisper.bin');
@@ -553,7 +626,7 @@
553626
printTextarea('');
554627

555628
setTimeout(function() {
556-
var ret = Module.full_default(instance, audio, document.getElementById('language').value, translate);
629+
var ret = Module.full_default(instance, audio, document.getElementById('language').value, nthreads, translate);
557630
console.log('js: full_default returned: ' + ret);
558631
if (ret) {
559632
printTextarea("js: whisper returned: " + ret);

0 commit comments

Comments
 (0)