Skip to content

Commit

Permalink
Merge pull request cogentapps#58 from tluyben/whisper-stt-api
Browse files Browse the repository at this point in the history
Whisper stt api
  • Loading branch information
cogentapps authored Mar 20, 2023
2 parents b6881c6 + 39e175b commit 95bf0ae
Show file tree
Hide file tree
Showing 11 changed files with 300 additions and 18 deletions.
2 changes: 2 additions & 0 deletions app/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"dependencies": {
"@auth0/auth0-spa-js": "^2.0.4",
"@emotion/css": "^11.10.6",
"@emotion/react": "^11.10.6",
"@emotion/styled": "^11.10.6",
"@mantine/core": "^5.10.5",
"@mantine/hooks": "^5.10.5",
Expand All @@ -20,6 +21,7 @@
"jshashes": "^1.0.8",
"localforage": "^1.10.0",
"match-sorter": "^6.3.1",
"mic-recorder-to-mp3": "^2.2.2",
"minisearch": "^6.0.1",
"natural": "^6.2.0",
"openai": "^3.2.1",
Expand Down
132 changes: 124 additions & 8 deletions app/src/components/input.tsx
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
import styled from '@emotion/styled';
import { Button, ActionIcon, Textarea, Loader } from '@mantine/core';
import { useMediaQuery } from '@mantine/hooks';
import { useCallback, useMemo } from 'react';
import { useCallback, useMemo, useState } from 'react';
import { FormattedMessage, useIntl } from 'react-intl';
import { useLocation } from 'react-router-dom';
import { useAppContext } from '../context';
import { useAppDispatch, useAppSelector } from '../store';
import { selectMessage, setMessage } from '../store/message';
import { selectTemperature } from '../store/parameters';
import { openSystemPromptPanel, openTemperaturePanel } from '../store/settings-ui';
import { speechRecognition } from '../speech-recognition-types.d'
import MicRecorder from 'mic-recorder-to-mp3';
import { selectUseOpenAIWhisper, selectOpenAIApiKey } from '../store/api-keys';
import { Mp3Encoder } from 'lamejs';

const Container = styled.div`
background: #292933;
Expand All @@ -34,12 +38,54 @@ export interface MessageInputProps {
disabled?: boolean;
}



async function chunkAndEncodeMP3File(file: Blob): Promise<Array<File>> {
const MAX_CHUNK_SIZE = 25 * 1024 * 1024; // 25 MB
const audioContext = new AudioContext();
const audioBuffer = await audioContext.decodeAudioData(await file.arrayBuffer());
const duration = audioBuffer.duration;
const sampleRate = audioBuffer.sampleRate;
const numChannels = audioBuffer.numberOfChannels;
const bytesPerSample = 2; // 16-bit audio
const samplesPerChunk = Math.floor((MAX_CHUNK_SIZE / bytesPerSample) / numChannels);
const totalSamples = Math.floor(duration * sampleRate);
const numChunks = Math.ceil(totalSamples / samplesPerChunk);

const chunks: Array<File> = [];
for (let i = 0; i < numChunks; i++) {
const startSample = i * samplesPerChunk;
const endSample = Math.min(startSample + samplesPerChunk, totalSamples);
const chunkDuration = (endSample - startSample) / sampleRate;
const chunkBuffer = audioContext.createBuffer(numChannels, endSample - startSample, sampleRate);
for (let c = 0; c < numChannels; c++) {
const channelData = audioBuffer.getChannelData(c).subarray(startSample, endSample);
chunkBuffer.copyToChannel(channelData, c);
}
const chunkBlob = await new Promise<Blob>((resolve) => {
const encoder = new Mp3Encoder(numChannels, sampleRate, 128);
const leftData = chunkBuffer.getChannelData(0);
const rightData = numChannels === 1 ? leftData : chunkBuffer.getChannelData(1);
const mp3Data = encoder.encodeBuffer(leftData, rightData);
const blob = new Blob([mp3Data], { type: 'audio/mp3' });
resolve(blob);
});
chunks.push(new File([chunkBlob], `text-${i}.mp3`, { type: 'audio/mp3' }));
}

return chunks;
}


export default function MessageInput(props: MessageInputProps) {
const temperature = useAppSelector(selectTemperature);
const message = useAppSelector(selectMessage);

const [recording, setRecording] = useState(false);
const hasVerticalSpace = useMediaQuery('(min-height: 1000px)');

const recorder = useMemo(() => new MicRecorder({ bitRate: 128 }), []);
const useOpenAIWhisper = useAppSelector(selectUseOpenAIWhisper);
const openAIApiKey = useAppSelector(selectOpenAIApiKey);

const context = useAppContext();
const dispatch = useAppDispatch();
const intl = useIntl();
Expand All @@ -58,6 +104,69 @@ export default function MessageInput(props: MessageInputProps) {
}
}, [context, message, dispatch]);

const onSpeechStart = useCallback(() => {

if (!recording) {
setRecording(true);

// if we are using whisper, the we will just record with the browser and send the api when done
if (useOpenAIWhisper) {
recorder.start().catch((e: any) => console.error(e));
} else {
speechRecognition.continuous = true;
speechRecognition.interimResults = true;

speechRecognition.onresult = (event) => {
const transcript = event.results[event.results.length - 1][0].transcript;
dispatch(setMessage(transcript));
};

speechRecognition.start();
}
} else {
setRecording(false);
if (useOpenAIWhisper) {
const mp3 = recorder.stop().getMp3();

mp3.then(async ([buffer, blob]) => {

const file = new File(buffer, 'chat.mp3', {
type: blob.type,
lastModified: Date.now()
});

// TODO: cut in chunks

var data = new FormData()
data.append('file', file);
data.append('model', 'whisper-1')

try {
const response = await fetch("https://api.openai.com/v1/audio/transcriptions", {
method: "POST",
headers: {
'Authorization': `Bearer ${openAIApiKey}`,
},
body: data,
});

const json = await response.json()

if (json.text) {
dispatch(setMessage(json.text));
}
} catch (e) {
console.log(e)
}

}).catch((e: any) => console.error(e));
} else {
speechRecognition.stop();
}
}
}, [recording, message, dispatch]);


const onKeyDown = useCallback((e: React.KeyboardEvent<HTMLTextAreaElement>) => {
if (e.key === 'Enter' && e.shiftKey === false && !props.disabled) {
e.preventDefault();
Expand All @@ -66,6 +175,7 @@ export default function MessageInput(props: MessageInputProps) {
}, [onSubmit, props.disabled]);

const rightSection = useMemo(() => {

return (
<div style={{
opacity: '0.8',
Expand All @@ -84,14 +194,20 @@ export default function MessageInput(props: MessageInputProps) {
<Loader size="xs" style={{ padding: '0 0.8rem 0 0.5rem' }} />
</>)}
{!context.generating && (
<ActionIcon size="xl"
onClick={onSubmit}>
<i className="fa fa-paper-plane" style={{ fontSize: '90%' }} />
</ActionIcon>
<>
<ActionIcon size="xl"
onClick={onSpeechStart}>
<i className="fa fa-microphone" style={{ fontSize: '90%', color: recording ? 'red' : 'inherit' }} />
</ActionIcon>
<ActionIcon size="xl"
onClick={onSubmit}>
<i className="fa fa-paper-plane" style={{ fontSize: '90%' }} />
</ActionIcon>
</>
)}
</div>
);
}, [onSubmit, props.disabled, context.generating]);
}, [recording, onSubmit, props.disabled, context.generating]);

const disabled = context.generating;

Expand Down
4 changes: 2 additions & 2 deletions app/src/components/message.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ export default function MessageComponent(props: { message: Message, last: boolea
<Button variant="subtle" size="sm" compact onClick={copy} style={{ marginLeft: '1rem' }}>
<i className="fa fa-clipboard" />
{copied ? <FormattedMessage defaultMessage="Copied" description="Label for copy-to-clipboard button after a successful copy" />
: <FormattedMessage defaultMessage="Copy" description="Label for copy-to-clipboard button" />}
: <FormattedMessage defaultMessage="Copy" description="Label for copy-to-clipboard button" />}
</Button>
)}
</CopyButton>
Expand All @@ -263,7 +263,7 @@ export default function MessageComponent(props: { message: Message, last: boolea
}}>
<i className="fa fa-edit" />
<span>
{editing ? <FormattedMessage defaultMessage="Cancel" description="Label for a button that appears when the user is editing the text of one of their messages, to cancel without saving changes" />
{editing ? <FormattedMessage defaultMessage="Cancel" description="Label for a button that appears when the user is editing the text of one of their messages, to cancel without saving changes" />
: <FormattedMessage defaultMessage="Edit" description="Label for the button the user can click to edit the text of one of their messages" />}
</span>
</Button>
Expand Down
17 changes: 13 additions & 4 deletions app/src/components/settings/user.tsx
Original file line number Diff line number Diff line change
@@ -1,24 +1,26 @@
import SettingsTab from "./tab";
import SettingsOption from "./option";
import { TextInput } from "@mantine/core";
import { Checkbox, TextInput } from "@mantine/core";
import { useCallback, useMemo } from "react";
import { useAppDispatch, useAppSelector } from "../../store";
import { selectOpenAIApiKey, setOpenAIApiKeyFromEvent } from "../../store/api-keys";
import { selectOpenAIApiKey, setOpenAIApiKeyFromEvent, selectUseOpenAIWhisper, setUseOpenAIWhisperFromEvent } from "../../store/api-keys";
import { selectSettingsOption } from "../../store/settings-ui";
import { FormattedMessage, useIntl } from "react-intl";

export default function UserOptionsTab(props: any) {
const option = useAppSelector(selectSettingsOption);
const openaiApiKey = useAppSelector(selectOpenAIApiKey);
const useOpenAIWhisper = useAppSelector(selectUseOpenAIWhisper);
const intl = useIntl()

const dispatch = useAppDispatch();
const onOpenAIApiKeyChange = useCallback((event: React.ChangeEvent<HTMLInputElement>) => dispatch(setOpenAIApiKeyFromEvent(event)), [dispatch]);
const onUseOpenAIWhisperChange = useCallback((event: React.ChangeEvent<HTMLInputElement>) => dispatch(setUseOpenAIWhisperFromEvent(event)), [dispatch]);

const elem = useMemo(() => (
<SettingsTab name="user">
<SettingsOption heading={intl.formatMessage({ defaultMessage: "Your OpenAI API Key", description: "Heading for the OpenAI API key setting on the settings screen" })}
focused={option === 'openai-api-key'}>
focused={option === 'openai-api-key'}>
<TextInput
placeholder={intl.formatMessage({ defaultMessage: "Paste your API key here" })}
value={openaiApiKey || ''}
Expand All @@ -28,6 +30,13 @@ export default function UserOptionsTab(props: any) {
<FormattedMessage defaultMessage="Find your API key here." description="Label for the link that takes the user to the page on the OpenAI website where they can find their API key." />
</a>
</p>

<Checkbox
style={{ marginTop: '1rem' }}
id="use-openai-whisper-api" checked={useOpenAIWhisper!} onChange={onUseOpenAIWhisperChange}
label="Use the OpenAI Whisper API for speech recognition."
/>

<p>
<FormattedMessage defaultMessage="Your API key is stored only on this device and never transmitted to anyone except OpenAI." />
</p>
Expand All @@ -36,7 +45,7 @@ export default function UserOptionsTab(props: any) {
</p>
</SettingsOption>
</SettingsTab>
), [option, openaiApiKey, onOpenAIApiKeyChange]);
), [option, openaiApiKey, useOpenAIWhisper, onOpenAIApiKeyChange]);

return elem;
}
2 changes: 1 addition & 1 deletion app/src/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ async function bootstrapApplication() {

root.render(
<React.StrictMode>
<IntlProvider locale={navigator.language} messages={messages}>
<IntlProvider locale={navigator.language} defaultLocale="en-GB" messages={messages}>
<MantineProvider theme={{ colorScheme: "dark" }}>
<Provider store={store}>
<PersistGate loading={null} persistor={persistor}>
Expand Down
7 changes: 4 additions & 3 deletions app/src/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ export interface OpenAIResponseChunk {

function parseResponseChunk(buffer: any): OpenAIResponseChunk {
const chunk = buffer.toString().replace('data: ', '').trim();

if (chunk === '[DONE]') {
return {
done: true,
Expand All @@ -51,7 +51,7 @@ export async function createChatCompletion(messages: OpenAIMessage[], parameters
const configuration = new Configuration({
apiKey: parameters.apiKey,
});

const openai = new OpenAIApi(configuration);

const response = await openai.createChatCompletion({
Expand Down Expand Up @@ -131,6 +131,7 @@ export async function createStreamingChatCompletion(messages: OpenAIMessage[], p
});

eventSource.addEventListener('message', async (event: any) => {

if (event.data === '[DONE]') {
emitter.emit('done');
return;
Expand All @@ -149,7 +150,7 @@ export async function createStreamingChatCompletion(messages: OpenAIMessage[], p

eventSource.stream();

return {
return {
emitter,
cancel: () => eventSource.close(),
};
Expand Down
Loading

0 comments on commit 95bf0ae

Please sign in to comment.