Speech to text | Issue #21 (#375)

* Implemented speech to text. Requires checking for tests --------- Co-authored-by: Willy Douhard <willy.douhard@gmail.com>
Chainlit · Oct 23, 2023 · 62192d9 · 62192d9
1 parent 60b3d1f
commit 62192d9
Show file tree

Hide file tree

Showing 6 changed files with 151 additions and 1 deletion.
diff --git a/backend/chainlit/config.py b/backend/chainlit/config.py
@@ -50,6 +50,9 @@
 # Authorize users to upload files with messages
 multi_modal = true
 
+# Allows user to use speech to text
+# speech_to_text = true
+
 [UI]
 # Name of the app and chatbot.
 name = "Chatbot"
@@ -145,6 +148,7 @@ class Theme(DataClassJsonMixin):
 class FeaturesSettings(DataClassJsonMixin):
     prompt_playground: bool = True
     multi_modal: bool = True
+    speech_to_text: bool = True
 
 
 @dataclass()

diff --git a/cypress/e2e/ask_multiple_files/.chainlit/config.toml b/cypress/e2e/ask_multiple_files/.chainlit/config.toml
@@ -0,0 +1,75 @@
+[project]
+# Whether to enable telemetry (default: true). No personal data is collected.
+enable_telemetry = true
+
+# List of environment variables to be provided by each user to use the app.
+user_env = []
+
+# Duration (in seconds) during which the session is saved when the connection is lost
+session_timeout = 3600
+
+# Enable third parties caching (e.g LangChain cache)
+cache = false
+
+# Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317)
+# follow_symlink = false
+
+[features]
+# Show the prompt playground
+prompt_playground = true
+
+# Authorize users to upload files with messages
+multi_modal = true
+
+[UI]
+# Name of the app and chatbot.
+name = "Chatbot"
+
+# Show the readme while the conversation is empty.
+show_readme_as_default = true
+
+# Description of the app and chatbot. This is used for HTML tags.
+# description = ""
+
+# Large size content are by default collapsed for a cleaner ui
+default_collapse_content = true
+
+# The default value for the expand messages settings.
+default_expand_messages = false
+
+# Hide the chain of thought details from the user in the UI.
+hide_cot = false
+
+# Link to your github repo. This will add a github button in the UI's header.
+# github = ""
+
+# Specify a CSS file that can be used to customize the user interface.
+# The CSS file can be served from the public directory or via an external link.
+# custom_css = "/public/test.css"
+
+# Allows user to use speech to text
+# speech_to_text = true
+
+# Override default MUI light theme. (Check theme.ts)
+[UI.theme.light]
+    #background = "#FAFAFA"
+    #paper = "#FFFFFF"
+
+    [UI.theme.light.primary]
+        #main = "#F80061"
+        #dark = "#980039"
+        #light = "#FFE7EB"
+
+# Override default MUI dark theme. (Check theme.ts)
+[UI.theme.dark]
+    #background = "#FAFAFA"
+    #paper = "#FFFFFF"
+
+    [UI.theme.dark.primary]
+        #main = "#F80061"
+        #dark = "#980039"
+        #light = "#FFE7EB"
+
+
+[meta]
+generated_by = "0.7.301"
diff --git a/frontend/package.json b/frontend/package.json
@@ -27,6 +27,7 @@
     "react-hotkeys-hook": "^4.4.1",
     "react-markdown": "^8.0.7",
     "react-router-dom": "^6.15.0",
+    "react-speech-recognition": "^3.10.0",
     "recoil": "^0.7.6",
     "remark-gfm": "^3.0.1",
     "socket.io-client": "^4.7.2",
@@ -39,6 +40,7 @@
     "@types/lodash": "^4.14.199",
     "@types/node": "^20.5.7",
     "@types/react": "^18.2.0",
+    "@types/react-speech-recognition": "^3.9.2",
     "@types/uuid": "^9.0.3",
     "@vitejs/plugin-react-swc": "^3.3.2",
     "typescript": "^5.2.2",

diff --git a/frontend/pnpm-lock.yaml b/frontend/pnpm-lock.yaml
diff --git a/frontend/src/components/organisms/chat/inputBox/input.tsx b/frontend/src/components/organisms/chat/inputBox/input.tsx
@@ -1,6 +1,11 @@
 import { useCallback, useEffect, useRef, useState } from 'react';
+import SpeechRecognition, {
+  useSpeechRecognition
+} from 'react-speech-recognition';
 import { useRecoilState, useSetRecoilState } from 'recoil';
 
+import KeyboardVoiceIcon from '@mui/icons-material/KeyboardVoice';
+import StopCircleIcon from '@mui/icons-material/StopCircle';
 import SendIcon from '@mui/icons-material/Telegram';
 import TuneIcon from '@mui/icons-material/Tune';
 import { Box, IconButton, Stack, TextField } from '@mui/material';
@@ -18,7 +23,7 @@ import HistoryButton from 'components/organisms/chat/history';
 
 import { attachmentsState } from 'state/chat';
 import { chatHistoryState } from 'state/chatHistory';
-import { chatSettingsOpenState } from 'state/project';
+import { chatSettingsOpenState, projectSettingsState } from 'state/project';
 
 import UploadButton from './UploadButton';
 
@@ -55,6 +60,16 @@ const Input = ({
 
   const [value, setValue] = useState('');
   const [isComposing, setIsComposing] = useState(false);
+  const [isRecording, setIsRecording] = useState(false);
+  const { transcript, browserSupportsSpeechRecognition } =
+    useSpeechRecognition();
+
+  const [pSettings] = useRecoilState(projectSettingsState);
+  const showTextToSpeech =
+    (pSettings?.features.speech_to_text === undefined
+      ? true
+      : pSettings?.features.speech_to_text) && browserSupportsSpeechRecognition;
+  const [lastTranscript, setLastTranscript] = useState('');
 
   useEffect(() => {
     const pasteEvent = (event: ClipboardEvent) => {
@@ -104,6 +119,13 @@ const Input = ({
     }
   }, [loading, disabled]);
 
+  useEffect(() => {
+    if (lastTranscript.length < transcript.length) {
+      setValue((text) => text + transcript.slice(lastTranscript.length));
+    }
+    setLastTranscript(transcript);
+  }, [transcript]);
+
   const submit = useCallback(() => {
     if (value === '' || disabled) {
       return;
@@ -169,6 +191,28 @@ const Input = ({
           <TuneIcon />
         </IconButton>
       )}
+      {showTextToSpeech &&
+        (isRecording ? (
+          <IconButton
+            onClick={() => {
+              setIsRecording(false);
+              SpeechRecognition.stopListening();
+            }}
+          >
+            <StopCircleIcon />
+          </IconButton>
+        ) : (
+          <IconButton
+            onClick={() => {
+              setIsRecording(true);
+              SpeechRecognition.startListening({
+                continuous: true
+              });
+            }}
+          >
+            <KeyboardVoiceIcon />
+          </IconButton>
+        ))}
       <UploadButton
         disabled={disabled}
         fileSpec={fileSpec}

diff --git a/frontend/src/state/project.ts b/frontend/src/state/project.ts
@@ -21,6 +21,7 @@ export interface IProjectSettings {
   };
   features: {
     multi_modal?: boolean;
+    speech_to_text?: boolean;
   };
   userEnv: string[];
   dataPersistence: boolean;