|
40 | 40 |
|
41 | 41 | Note that the computation is quite heavy and may take a few seconds to complete.<br> |
42 | 42 | The transcription results will be displayed in the text area below.<br><br> |
43 | | - <b>Important: your browser must support WASM SIMD instructions for this to work.</b> |
| 43 | + <b>Important:</b> |
| 44 | + <ul> |
| 45 | + <li>your browser must support WASM SIMD instructions for this to work</li> |
| 46 | + <li>quantized models are still in experimental stage (<a href="https://github.com/ggerganov/ggml/pull/27">more info</a>)</li> |
| 47 | + <li>Firefox cannot load files larger than 256 MB - use Chrome instead</li> |
| 48 | + </ul> |
44 | 49 |
|
45 | | - <br><br><hr> |
| 50 | + <hr> |
46 | 51 |
|
47 | 52 | <div id="model"> |
48 | | - Whisper model: <span id="model-whisper-status"></span> |
| 53 | + Whisper models: <span id="model-whisper-status"></span><br><br> |
49 | 54 | <button id="fetch-whisper-tiny-en" onclick="loadWhisper('tiny.en')">tiny.en (75 MB)</button> |
50 | 55 | <button id="fetch-whisper-tiny" onclick="loadWhisper('tiny')">tiny (75 MB)</button> |
51 | 56 | <button id="fetch-whisper-base-en" onclick="loadWhisper('base.en')">base.en (142 MB)</button> |
52 | 57 | <button id="fetch-whisper-base" onclick="loadWhisper('base')">base (142 MB)</button> |
53 | 58 | <button id="fetch-whisper-small-en" onclick="loadWhisper('small.en')">small.en (466 MB)</button> |
54 | 59 | <button id="fetch-whisper-small" onclick="loadWhisper('small')">small (466 MB)</button> |
55 | | - <span id="fetch-whisper-progress"></span> |
56 | | - |
57 | 60 | <input type="file" id="whisper-file" name="file" onchange="loadFile(event, 'whisper.bin')" /> |
| 61 | + <br><br> |
| 62 | + Quantized models:<br><br> |
| 63 | + <button id="fetch-whisper-base-en-q4_0" onclick="loadWhisper('base-en-q4_0')">base.en (4bit, 49 MB)</button> |
| 64 | + <button id="fetch-whisper-base-q4_0" onclick="loadWhisper('base-q4_0')">base (4bit, 49 MB)</button> |
| 65 | + <button id="fetch-whisper-small-en-q4_0" onclick="loadWhisper('small-en-q4_0')">small.en (4bit, 152 MB)</button> |
| 66 | + <button id="fetch-whisper-small-q4_0" onclick="loadWhisper('small-q4_0')">small (4bit, 152 MB)</button><br> |
| 67 | + <button id="fetch-whisper-medium-en-q4_0" onclick="loadWhisper('medium-en-q4_0')">medium.en (4bit, 469 MB)</button> |
| 68 | + <button id="fetch-whisper-medium-q4_0" onclick="loadWhisper('medium-q4_0')">medium (4bit, 469 MB)</button> |
| 69 | + <button id="fetch-whisper-large-q4_0" onclick="loadWhisper('large-q4_0')">large (4bit, 985 MB)</button> |
| 70 | + <span id="fetch-whisper-progress"></span> |
58 | 71 | </div> |
59 | 72 |
|
60 | 73 | <br> |
|
161 | 174 | <option value="yi">Yiddish</option> |
162 | 175 | </select> |
163 | 176 | </td> |
| 177 | + <!-- Slider to select number of threads between 1 and 16 --> |
| 178 | + <td> |
| 179 | + Threads: |
| 180 | + <input type="range" id="threads" name="threads" min="1" max="16" value="8" onchange="changeThreads(this.value)" /> |
| 181 | + <span id="threads-value">8</span> |
| 182 | + </td> |
164 | 183 | <td> |
165 | 184 | <button onclick="onProcess(false);">Transcribe</button> |
166 | 185 | </td> |
|
263 | 282 |
|
264 | 283 | Module.FS_createDataFile("/", fname, buf, true, true); |
265 | 284 |
|
266 | | - model_whisper = fname; |
| 285 | + //model_whisper = fname; |
267 | 286 |
|
268 | 287 | document.getElementById('model-whisper-status').innerHTML = 'loaded "' + model_whisper + '"!'; |
269 | 288 |
|
270 | 289 | printTextarea('storeFS: stored model: ' + fname + ' size: ' + buf.length); |
| 290 | + |
| 291 | + document.getElementById('model').innerHTML = 'Model fetched: ' + model_whisper; |
271 | 292 | } |
272 | 293 |
|
273 | 294 | function loadFile(event, fname) { |
|
292 | 313 | document.getElementById('fetch-whisper-tiny' ).style.display = 'none'; |
293 | 314 | document.getElementById('fetch-whisper-base' ).style.display = 'none'; |
294 | 315 | document.getElementById('fetch-whisper-small' ).style.display = 'none'; |
| 316 | + |
| 317 | + document.getElementById('fetch-whisper-base-en-q4_0' ).style.display = 'none'; |
| 318 | + document.getElementById('fetch-whisper-base-q4_0' ).style.display = 'none'; |
| 319 | + document.getElementById('fetch-whisper-small-en-q4_0' ).style.display = 'none'; |
| 320 | + document.getElementById('fetch-whisper-small-q4_0' ).style.display = 'none'; |
| 321 | + document.getElementById('fetch-whisper-medium-en-q4_0').style.display = 'none'; |
| 322 | + document.getElementById('fetch-whisper-medium-q4_0' ).style.display = 'none'; |
| 323 | + document.getElementById('fetch-whisper-large-q4_0' ).style.display = 'none'; |
| 324 | + |
295 | 325 | document.getElementById('whisper-file' ).style.display = 'none'; |
296 | 326 | document.getElementById('model-whisper-status' ).innerHTML = 'loaded model: ' + file.name; |
297 | 327 | } |
|
304 | 334 | 'base': 'https://whisper.ggerganov.com/ggml-model-whisper-base.bin', |
305 | 335 | 'small.en': 'https://whisper.ggerganov.com/ggml-model-whisper-small.en.bin', |
306 | 336 | 'small': 'https://whisper.ggerganov.com/ggml-model-whisper-small.bin', |
| 337 | + |
| 338 | + 'base-en-q4_0': 'https://whisper.ggerganov.com/ggml-model-whisper-base.en-q4_0.bin', |
| 339 | + 'base-q4_0': 'https://whisper.ggerganov.com/ggml-model-whisper-base-q4_0.bin', |
| 340 | + 'small-en-q4_0': 'https://whisper.ggerganov.com/ggml-model-whisper-small.en-q4_0.bin', |
| 341 | + 'small-q4_0': 'https://whisper.ggerganov.com/ggml-model-whisper-small-q4_0.bin', |
| 342 | + 'medium-en-q4_0':'https://whisper.ggerganov.com/ggml-model-whisper-medium.en-q4_0.bin', |
| 343 | + 'medium-q4_0': 'https://whisper.ggerganov.com/ggml-model-whisper-medium-q4_0.bin', |
| 344 | + 'large-q4_0': 'https://whisper.ggerganov.com/ggml-model-whisper-large-q4_0.bin', |
307 | 345 | }; |
308 | 346 |
|
309 | 347 | let sizes = { |
|
313 | 351 | 'base': 142, |
314 | 352 | 'small.en': 466, |
315 | 353 | 'small': 466, |
| 354 | + |
| 355 | + 'base-en-q4_0': 49, |
| 356 | + 'base-q4_0': 49, |
| 357 | + 'small-en-q4_0': 152, |
| 358 | + 'small-q4_0': 152, |
| 359 | + 'medium-en-q4_0': 469, |
| 360 | + 'medium-q4_0': 469, |
| 361 | + 'large-q4_0': 985, |
316 | 362 | }; |
317 | 363 |
|
318 | 364 | let url = urls[model]; |
|
327 | 373 | document.getElementById('fetch-whisper-tiny' ).style.display = 'none'; |
328 | 374 | document.getElementById('fetch-whisper-base' ).style.display = 'none'; |
329 | 375 | document.getElementById('fetch-whisper-small' ).style.display = 'none'; |
| 376 | + |
| 377 | + document.getElementById('fetch-whisper-base-en-q4_0' ).style.display = 'none'; |
| 378 | + document.getElementById('fetch-whisper-base-q4_0' ).style.display = 'none'; |
| 379 | + document.getElementById('fetch-whisper-small-en-q4_0' ).style.display = 'none'; |
| 380 | + document.getElementById('fetch-whisper-small-q4_0' ).style.display = 'none'; |
| 381 | + document.getElementById('fetch-whisper-medium-en-q4_0').style.display = 'none'; |
| 382 | + document.getElementById('fetch-whisper-medium-q4_0' ).style.display = 'none'; |
| 383 | + document.getElementById('fetch-whisper-large-q4_0' ).style.display = 'none'; |
| 384 | + |
330 | 385 | document.getElementById('whisper-file' ).style.display = 'none'; |
331 | 386 | document.getElementById('model-whisper-status' ).innerHTML = 'loading model: ' + model; |
332 | 387 |
|
|
337 | 392 |
|
338 | 393 | cbCancel = function() { |
339 | 394 | var el; |
| 395 | + |
340 | 396 | el = document.getElementById('fetch-whisper-tiny-en' ); if (el) el.style.display = 'inline-block'; |
341 | 397 | el = document.getElementById('fetch-whisper-base-en' ); if (el) el.style.display = 'inline-block'; |
342 | 398 | el = document.getElementById('fetch-whisper-small-en'); if (el) el.style.display = 'inline-block'; |
343 | 399 | el = document.getElementById('fetch-whisper-tiny' ); if (el) el.style.display = 'inline-block'; |
344 | 400 | el = document.getElementById('fetch-whisper-base' ); if (el) el.style.display = 'inline-block'; |
345 | 401 | el = document.getElementById('fetch-whisper-small' ); if (el) el.style.display = 'inline-block'; |
| 402 | + |
| 403 | + el = document.getElementById('fetch-whisper-base-en-q4_0' ); if (el) el.style.display = 'inline-block'; |
| 404 | + el = document.getElementById('fetch-whisper-base-q4_0' ); if (el) el.style.display = 'inline-block'; |
| 405 | + el = document.getElementById('fetch-whisper-small-en-q4_0' ); if (el) el.style.display = 'inline-block'; |
| 406 | + el = document.getElementById('fetch-whisper-small-q4_0' ); if (el) el.style.display = 'inline-block'; |
| 407 | + el = document.getElementById('fetch-whisper-medium-en-q4_0'); if (el) el.style.display = 'inline-block'; |
| 408 | + el = document.getElementById('fetch-whisper-medium-q4_0' ); if (el) el.style.display = 'inline-block'; |
| 409 | + el = document.getElementById('fetch-whisper-large-q4_0' ); if (el) el.style.display = 'inline-block'; |
| 410 | + |
346 | 411 | el = document.getElementById('whisper-file' ); if (el) el.style.display = 'inline-block'; |
347 | 412 | el = document.getElementById('model-whisper-status' ); if (el) el.innerHTML = ''; |
348 | 413 | }; |
|
354 | 419 | // audio file |
355 | 420 | // |
356 | 421 |
|
357 | | - const kMaxAudio_s = 120; |
| 422 | + const kMaxAudio_s = 30*60; |
| 423 | + const kMaxRecording_s = 2*60; |
358 | 424 | const kSampleRate = 16000; |
359 | 425 |
|
360 | 426 | window.AudioContext = window.AudioContext || window.webkitAudioContext; |
|
423 | 489 | doRecording = false; |
424 | 490 | } |
425 | 491 |
|
426 | | - // record up to kMaxAudio_s seconds of audio from the microphone |
| 492 | + // record up to kMaxRecording_s seconds of audio from the microphone |
427 | 493 | // check if doRecording is false every 1000 ms and stop recording if so |
428 | 494 | // update progress information |
429 | 495 | function startRecording() { |
|
479 | 545 | printTextarea('js: audio recorded, size: ' + audio.length); |
480 | 546 |
|
481 | 547 | // truncate to first 30 seconds |
482 | | - if (audio.length > kMaxAudio_s*kSampleRate) { |
483 | | - audio = audio.slice(0, kMaxAudio_s*kSampleRate); |
484 | | - printTextarea('js: truncated audio to first ' + kMaxAudio_s + ' seconds'); |
| 548 | + if (audio.length > kMaxRecording_s*kSampleRate) { |
| 549 | + audio = audio.slice(0, kMaxRecording_s*kSampleRate); |
| 550 | + printTextarea('js: truncated audio to first ' + kMaxRecording_s + ' seconds'); |
485 | 551 | } |
486 | 552 | setAudio(audio); |
487 | 553 | }); |
|
509 | 575 | }); |
510 | 576 | } |
511 | 577 |
|
512 | | - document.getElementById('progress-bar').style.width = (100*(Date.now() - startTime)/1000/kMaxAudio_s) + '%'; |
513 | | - document.getElementById('progress-text').innerHTML = (100*(Date.now() - startTime)/1000/kMaxAudio_s).toFixed(0) + '%'; |
| 578 | + document.getElementById('progress-bar').style.width = (100*(Date.now() - startTime)/1000/kMaxRecording_s) + '%'; |
| 579 | + document.getElementById('progress-text').innerHTML = (100*(Date.now() - startTime)/1000/kMaxRecording_s).toFixed(0) + '%'; |
514 | 580 | }, 1000); |
515 | 581 |
|
516 | 582 | printTextarea('js: recording ...'); |
517 | 583 |
|
518 | 584 | setTimeout(function() { |
519 | 585 | if (doRecording) { |
520 | | - printTextarea('js: recording stopped after ' + kMaxAudio_s + ' seconds'); |
| 586 | + printTextarea('js: recording stopped after ' + kMaxRecording_s + ' seconds'); |
521 | 587 | stopRecording(); |
522 | 588 | } |
523 | | - }, kMaxAudio_s*1000); |
| 589 | + }, kMaxRecording_s*1000); |
524 | 590 | } |
525 | 591 |
|
526 | 592 | // |
527 | 593 | // transcribe |
528 | 594 | // |
529 | 595 |
|
| 596 | + var nthreads = 8; |
| 597 | + |
| 598 | + function changeThreads(value) { |
| 599 | + nthreads = value; |
| 600 | + document.getElementById('threads-value').innerHTML = nthreads; |
| 601 | + } |
| 602 | + |
530 | 603 | function onProcess(translate) { |
531 | 604 | if (!instance) { |
532 | 605 | instance = Module.init('whisper.bin'); |
|
553 | 626 | printTextarea(''); |
554 | 627 |
|
555 | 628 | setTimeout(function() { |
556 | | - var ret = Module.full_default(instance, audio, document.getElementById('language').value, translate); |
| 629 | + var ret = Module.full_default(instance, audio, document.getElementById('language').value, nthreads, translate); |
557 | 630 | console.log('js: full_default returned: ' + ret); |
558 | 631 | if (ret) { |
559 | 632 | printTextarea("js: whisper returned: " + ret); |
|
0 commit comments