diff --git a/app/package.json b/app/package.json
index 2b690c25..2b4850fb 100644
--- a/app/package.json
+++ b/app/package.json
@@ -4,6 +4,7 @@
"dependencies": {
"@auth0/auth0-spa-js": "^2.0.4",
"@emotion/css": "^11.10.6",
+ "@emotion/react": "^11.10.6",
"@emotion/styled": "^11.10.6",
"@mantine/core": "^5.10.5",
"@mantine/hooks": "^5.10.5",
@@ -20,6 +21,7 @@
"jshashes": "^1.0.8",
"localforage": "^1.10.0",
"match-sorter": "^6.3.1",
+ "mic-recorder-to-mp3": "^2.2.2",
"minisearch": "^6.0.1",
"natural": "^6.2.0",
"openai": "^3.2.1",
diff --git a/app/src/components/input.tsx b/app/src/components/input.tsx
index 18da4a10..2e440b6d 100644
--- a/app/src/components/input.tsx
+++ b/app/src/components/input.tsx
@@ -1,7 +1,7 @@
import styled from '@emotion/styled';
import { Button, ActionIcon, Textarea, Loader } from '@mantine/core';
import { useMediaQuery } from '@mantine/hooks';
-import { useCallback, useMemo } from 'react';
+import { useCallback, useMemo, useState } from 'react';
import { FormattedMessage, useIntl } from 'react-intl';
import { useLocation } from 'react-router-dom';
import { useAppContext } from '../context';
@@ -9,6 +9,10 @@ import { useAppDispatch, useAppSelector } from '../store';
import { selectMessage, setMessage } from '../store/message';
import { selectTemperature } from '../store/parameters';
import { openSystemPromptPanel, openTemperaturePanel } from '../store/settings-ui';
+import { speechRecognition } from '../speech-recognition-types.d'
+import MicRecorder from 'mic-recorder-to-mp3';
+import { selectUseOpenAIWhisper, selectOpenAIApiKey } from '../store/api-keys';
+import { Mp3Encoder } from 'lamejs';
const Container = styled.div`
background: #292933;
@@ -34,12 +38,54 @@ export interface MessageInputProps {
disabled?: boolean;
}
+
+
+async function chunkAndEncodeMP3File(file: Blob): Promise> {
+ const MAX_CHUNK_SIZE = 25 * 1024 * 1024; // 25 MB
+ const audioContext = new AudioContext();
+ const audioBuffer = await audioContext.decodeAudioData(await file.arrayBuffer());
+ const duration = audioBuffer.duration;
+ const sampleRate = audioBuffer.sampleRate;
+ const numChannels = audioBuffer.numberOfChannels;
+ const bytesPerSample = 2; // 16-bit audio
+ const samplesPerChunk = Math.floor((MAX_CHUNK_SIZE / bytesPerSample) / numChannels);
+ const totalSamples = Math.floor(duration * sampleRate);
+ const numChunks = Math.ceil(totalSamples / samplesPerChunk);
+
+ const chunks: Array = [];
+ for (let i = 0; i < numChunks; i++) {
+ const startSample = i * samplesPerChunk;
+ const endSample = Math.min(startSample + samplesPerChunk, totalSamples);
+ const chunkDuration = (endSample - startSample) / sampleRate;
+ const chunkBuffer = audioContext.createBuffer(numChannels, endSample - startSample, sampleRate);
+ for (let c = 0; c < numChannels; c++) {
+ const channelData = audioBuffer.getChannelData(c).subarray(startSample, endSample);
+ chunkBuffer.copyToChannel(channelData, c);
+ }
+ const chunkBlob = await new Promise((resolve) => {
+ const encoder = new Mp3Encoder(numChannels, sampleRate, 128);
+ const leftData = chunkBuffer.getChannelData(0);
+ const rightData = numChannels === 1 ? leftData : chunkBuffer.getChannelData(1);
+ const mp3Data = encoder.encodeBuffer(leftData, rightData);
+ const blob = new Blob([mp3Data], { type: 'audio/mp3' });
+ resolve(blob);
+ });
+ chunks.push(new File([chunkBlob], `text-${i}.mp3`, { type: 'audio/mp3' }));
+ }
+
+ return chunks;
+}
+
+
export default function MessageInput(props: MessageInputProps) {
const temperature = useAppSelector(selectTemperature);
const message = useAppSelector(selectMessage);
-
+ const [recording, setRecording] = useState(false);
const hasVerticalSpace = useMediaQuery('(min-height: 1000px)');
-
+ const recorder = useMemo(() => new MicRecorder({ bitRate: 128 }), []);
+ const useOpenAIWhisper = useAppSelector(selectUseOpenAIWhisper);
+ const openAIApiKey = useAppSelector(selectOpenAIApiKey);
+
const context = useAppContext();
const dispatch = useAppDispatch();
const intl = useIntl();
@@ -58,6 +104,69 @@ export default function MessageInput(props: MessageInputProps) {
}
}, [context, message, dispatch]);
+ const onSpeechStart = useCallback(() => {
+
+ if (!recording) {
+ setRecording(true);
+
+ // if we are using whisper, the we will just record with the browser and send the api when done
+ if (useOpenAIWhisper) {
+ recorder.start().catch((e: any) => console.error(e));
+ } else {
+ speechRecognition.continuous = true;
+ speechRecognition.interimResults = true;
+
+ speechRecognition.onresult = (event) => {
+ const transcript = event.results[event.results.length - 1][0].transcript;
+ dispatch(setMessage(transcript));
+ };
+
+ speechRecognition.start();
+ }
+ } else {
+ setRecording(false);
+ if (useOpenAIWhisper) {
+ const mp3 = recorder.stop().getMp3();
+
+ mp3.then(async ([buffer, blob]) => {
+
+ const file = new File(buffer, 'chat.mp3', {
+ type: blob.type,
+ lastModified: Date.now()
+ });
+
+ // TODO: cut in chunks
+
+ var data = new FormData()
+ data.append('file', file);
+ data.append('model', 'whisper-1')
+
+ try {
+ const response = await fetch("https://api.openai.com/v1/audio/transcriptions", {
+ method: "POST",
+ headers: {
+ 'Authorization': `Bearer ${openAIApiKey}`,
+ },
+ body: data,
+ });
+
+ const json = await response.json()
+
+ if (json.text) {
+ dispatch(setMessage(json.text));
+ }
+ } catch (e) {
+ console.log(e)
+ }
+
+ }).catch((e: any) => console.error(e));
+ } else {
+ speechRecognition.stop();
+ }
+ }
+ }, [recording, message, dispatch]);
+
+
const onKeyDown = useCallback((e: React.KeyboardEvent) => {
if (e.key === 'Enter' && e.shiftKey === false && !props.disabled) {
e.preventDefault();
@@ -66,6 +175,7 @@ export default function MessageInput(props: MessageInputProps) {
}, [onSubmit, props.disabled]);
const rightSection = useMemo(() => {
+
return (
>)}
{!context.generating && (
-
-
-
+ <>
+
+
+
+
+
+
+ >
)}
);
- }, [onSubmit, props.disabled, context.generating]);
+ }, [recording, onSubmit, props.disabled, context.generating]);
const disabled = context.generating;
diff --git a/app/src/components/message.tsx b/app/src/components/message.tsx
index de77ae1a..de14c6f0 100644
--- a/app/src/components/message.tsx
+++ b/app/src/components/message.tsx
@@ -244,7 +244,7 @@ export default function MessageComponent(props: { message: Message, last: boolea
)}
@@ -263,7 +263,7 @@ export default function MessageComponent(props: { message: Message, last: boolea
}}>
- {editing ?
+ {editing ?
: }
diff --git a/app/src/components/settings/user.tsx b/app/src/components/settings/user.tsx
index 9e217fa2..6b6d1303 100644
--- a/app/src/components/settings/user.tsx
+++ b/app/src/components/settings/user.tsx
@@ -1,24 +1,26 @@
import SettingsTab from "./tab";
import SettingsOption from "./option";
-import { TextInput } from "@mantine/core";
+import { Checkbox, TextInput } from "@mantine/core";
import { useCallback, useMemo } from "react";
import { useAppDispatch, useAppSelector } from "../../store";
-import { selectOpenAIApiKey, setOpenAIApiKeyFromEvent } from "../../store/api-keys";
+import { selectOpenAIApiKey, setOpenAIApiKeyFromEvent, selectUseOpenAIWhisper, setUseOpenAIWhisperFromEvent } from "../../store/api-keys";
import { selectSettingsOption } from "../../store/settings-ui";
import { FormattedMessage, useIntl } from "react-intl";
export default function UserOptionsTab(props: any) {
const option = useAppSelector(selectSettingsOption);
const openaiApiKey = useAppSelector(selectOpenAIApiKey);
+ const useOpenAIWhisper = useAppSelector(selectUseOpenAIWhisper);
const intl = useIntl()
const dispatch = useAppDispatch();
const onOpenAIApiKeyChange = useCallback((event: React.ChangeEvent) => dispatch(setOpenAIApiKeyFromEvent(event)), [dispatch]);
+ const onUseOpenAIWhisperChange = useCallback((event: React.ChangeEvent) => dispatch(setUseOpenAIWhisperFromEvent(event)), [dispatch]);
const elem = useMemo(() => (
+ focused={option === 'openai-api-key'}>
+
+
+
@@ -36,7 +45,7 @@ export default function UserOptionsTab(props: any) {
- ), [option, openaiApiKey, onOpenAIApiKeyChange]);
+ ), [option, openaiApiKey, useOpenAIWhisper, onOpenAIApiKeyChange]);
return elem;
}
\ No newline at end of file
diff --git a/app/src/index.tsx b/app/src/index.tsx
index 877a99f9..209c75b6 100644
--- a/app/src/index.tsx
+++ b/app/src/index.tsx
@@ -72,7 +72,7 @@ async function bootstrapApplication() {
root.render(
-
+
diff --git a/app/src/openai.ts b/app/src/openai.ts
index 07e6afcd..0e9eb9c7 100644
--- a/app/src/openai.ts
+++ b/app/src/openai.ts
@@ -26,7 +26,7 @@ export interface OpenAIResponseChunk {
function parseResponseChunk(buffer: any): OpenAIResponseChunk {
const chunk = buffer.toString().replace('data: ', '').trim();
-
+
if (chunk === '[DONE]') {
return {
done: true,
@@ -51,7 +51,7 @@ export async function createChatCompletion(messages: OpenAIMessage[], parameters
const configuration = new Configuration({
apiKey: parameters.apiKey,
});
-
+
const openai = new OpenAIApi(configuration);
const response = await openai.createChatCompletion({
@@ -131,6 +131,7 @@ export async function createStreamingChatCompletion(messages: OpenAIMessage[], p
});
eventSource.addEventListener('message', async (event: any) => {
+
if (event.data === '[DONE]') {
emitter.emit('done');
return;
@@ -149,7 +150,7 @@ export async function createStreamingChatCompletion(messages: OpenAIMessage[], p
eventSource.stream();
- return {
+ return {
emitter,
cancel: () => eventSource.close(),
};
diff --git a/app/src/speech-recognition-types.d.ts b/app/src/speech-recognition-types.d.ts
new file mode 100644
index 00000000..8e205a5b
--- /dev/null
+++ b/app/src/speech-recognition-types.d.ts
@@ -0,0 +1,133 @@
+declare global {
+ interface Window {
+ SpeechRecognition: SpeechRecognition
+ }
+ interface SpeechGrammar {
+ src: string
+ weight: number
+ }
+
+ const SpeechGrammar: {
+ prototype: SpeechGrammar
+ new(): SpeechGrammar
+ }
+
+ interface SpeechGrammarList {
+ readonly length: number
+ addFromString(string: string, weight?: number): void
+ addFromURI(src: string, weight?: number): void
+ item(index: number): SpeechGrammar
+ [index: number]: SpeechGrammar
+ }
+
+ const SpeechGrammarList: {
+ prototype: SpeechGrammarList
+ new(): SpeechGrammarList
+ }
+
+ interface SpeechRecognitionEventMap {
+ audioend: Event
+ audiostart: Event
+ end: Event
+ error: SpeechRecognitionError
+ nomatch: SpeechRecognitionEvent
+ result: SpeechRecognitionEvent
+ soundend: Event
+ soundstart: Event
+ speechend: Event
+ speechstart: Event
+ start: Event
+ }
+
+ interface SpeechRecognition {
+ continuous: boolean
+ grammars: SpeechGrammarList
+ interimResults: boolean
+ lang: string
+ maxAlternatives: number
+ onaudioend: ((this: SpeechRecognition, ev: Event) => any) | null
+ onaudiostart: ((this: SpeechRecognition, ev: Event) => any) | null
+ onend: ((this: SpeechRecognition, ev: Event) => any) | null
+ onerror:
+ | ((this: SpeechRecognition, ev: SpeechRecognitionError) => any)
+ | null
+ onnomatch:
+ | ((this: SpeechRecognition, ev: SpeechRecognitionEvent) => any)
+ | null
+ onresult:
+ | ((this: SpeechRecognition, ev: SpeechRecognitionEvent) => any)
+ | null
+ onsoundend: ((this: SpeechRecognition, ev: Event) => any) | null
+ onsoundstart: ((this: SpeechRecognition, ev: Event) => any) | null
+ onspeechend: ((this: SpeechRecognition, ev: Event) => any) | null
+ onspeechstart: ((this: SpeechRecognition, ev: Event) => any) | null
+ onstart: ((this: SpeechRecognition, ev: Event) => any) | null
+ serviceURI: string
+ abort(): void
+ start(): void
+ stop(): void
+ addEventListener(
+ type: K,
+ listener: (
+ this: SpeechRecognition,
+ ev: SpeechRecognitionEventMap[K]
+ ) => any,
+ options?: boolean | AddEventListenerOptions
+ ): void
+ addEventListener(
+ type: string,
+ listener: EventListenerOrEventListenerObject,
+ options?: boolean | AddEventListenerOptions
+ ): void
+ removeEventListener(
+ type: K,
+ listener: (
+ this: SpeechRecognition,
+ ev: SpeechRecognitionEventMap[K]
+ ) => any,
+ options?: boolean | EventListenerOptions
+ ): void
+ removeEventListener(
+ type: string,
+ listener: EventListenerOrEventListenerObject,
+ options?: boolean | EventListenerOptions
+ ): void
+ }
+
+ const SpeechRecognition: {
+ prototype: SpeechRecognition
+ new(): SpeechRecognition
+ }
+
+ interface SpeechRecognitionError extends Event {
+ // readonly error: SpeechRecognitionErrorCode;
+ readonly message: string
+ }
+
+ const SpeechRecognitionError: {
+ prototype: SpeechRecognitionError
+ new(): SpeechRecognitionError
+ }
+
+ interface SpeechRecognitionEvent extends Event {
+ readonly emma: Document | null
+ readonly interpretation: any
+ readonly resultIndex: number
+ readonly results: SpeechRecognitionResultList
+ }
+
+ const SpeechRecognitionEvent: {
+ prototype: SpeechRecognitionEvent
+ new(): SpeechRecognitionEvent
+ }
+}
+
+let speechRecognition: SpeechRecognition
+
+if (window.SpeechRecognition) {
+ speechRecognition = new SpeechRecognition()
+} else {
+ speechRecognition = new webkitSpeechRecognition()
+}
+
+export { speechRecognition }
\ No newline at end of file
diff --git a/app/src/store/api-keys.ts b/app/src/store/api-keys.ts
index 59a86344..eced56c4 100644
--- a/app/src/store/api-keys.ts
+++ b/app/src/store/api-keys.ts
@@ -3,9 +3,12 @@ import type { RootState } from '.';
const initialState: {
openAIApiKey?: string | null | undefined;
+ useOpenAIWhisper: boolean;
elevenLabsApiKey?: string | null | undefined;
+
} = {
openAIApiKey: localStorage.getItem('openai-api-key'),
+ useOpenAIWhisper: false,
elevenLabsApiKey: localStorage.getItem('elevenlabs-api-key'),
};
@@ -18,7 +21,11 @@ export const apiKeysSlice = createSlice({
},
setElevenLabsApiKey: (state, action: PayloadAction) => {
state.elevenLabsApiKey = action.payload;
+ },
+ setUseOpenAIWhisper: (state, action: PayloadAction) => {
+ state.useOpenAIWhisper = action.payload;
}
+
},
})
@@ -26,8 +33,10 @@ export const { setOpenAIApiKey, setElevenLabsApiKey } = apiKeysSlice.actions;
export const setOpenAIApiKeyFromEvent = (event: React.ChangeEvent) => apiKeysSlice.actions.setOpenAIApiKey(event.target.value);
export const setElevenLabsApiKeyFromEvent = (event: React.ChangeEvent) => apiKeysSlice.actions.setElevenLabsApiKey(event.target.value);
+export const setUseOpenAIWhisperFromEvent = (event: React.ChangeEvent) => apiKeysSlice.actions.setUseOpenAIWhisper(event.target.checked);
export const selectOpenAIApiKey = (state: RootState) => state.apiKeys.openAIApiKey;
export const selectElevenLabsApiKey = (state: RootState) => state.apiKeys.elevenLabsApiKey;
+export const selectUseOpenAIWhisper = (state: RootState) => state.apiKeys.useOpenAIWhisper;
export default apiKeysSlice.reducer;
\ No newline at end of file
diff --git a/app/src/store/index.ts b/app/src/store/index.ts
index fa5be686..be1fdde8 100644
--- a/app/src/store/index.ts
+++ b/app/src/store/index.ts
@@ -25,6 +25,8 @@ const persistMessageConfig = {
storage,
}
+
+
const store = configureStore({
reducer: {
// auth: authReducer,
diff --git a/server/src/endpoints/whisper.ts b/server/src/endpoints/whisper.ts
new file mode 100644
index 00000000..94a81671
--- /dev/null
+++ b/server/src/endpoints/whisper.ts
@@ -0,0 +1,8 @@
+import express from 'express';
+import RequestHandler from "./base";
+
+export default class WhisperRequestHandler extends RequestHandler {
+ handler(req: express.Request, res: express.Response): any {
+ res.json({ status: 'ok' });
+ }
+}
\ No newline at end of file
diff --git a/server/src/index.ts b/server/src/index.ts
index d8328499..1720daf8 100644
--- a/server/src/index.ts
+++ b/server/src/index.ts
@@ -18,6 +18,7 @@ import BasicCompletionRequestHandler from './endpoints/completion/basic';
import StreamingCompletionRequestHandler from './endpoints/completion/streaming';
import SessionRequestHandler from './endpoints/session';
import GetShareRequestHandler from './endpoints/get-share';
+import WhisperRequestHandler from './endpoints/whisper';
import { configurePassport } from './passport';
import { configureAuth0 } from './auth0';
import DeleteChatRequestHandler from './endpoints/delete-chat';
@@ -82,6 +83,7 @@ export default class ChatServer {
this.app.post('/chatapi/sync', (req, res) => new SyncRequestHandler(this, req, res));
this.app.get('/chatapi/share/:id', (req, res) => new GetShareRequestHandler(this, req, res));
this.app.post('/chatapi/share', (req, res) => new ShareRequestHandler(this, req, res));
+ this.app.post('/chatapi/whisper', (req, res) => new WhisperRequestHandler(this, req, res));
if (process.env.ENABLE_SERVER_COMPLETION) {
this.app.post('/chatapi/completion', (req, res) => new BasicCompletionRequestHandler(this, req, res));