From 62192d9f712f3cf40a291ba1e90c0d545d05ab1c Mon Sep 17 00:00:00 2001 From: Mohammed Alnasser Date: Mon, 23 Oct 2023 07:39:28 -0400 Subject: [PATCH] Speech to text | Issue #21 (#375) * Implemented speech to text. Requires checking for tests --------- Co-authored-by: Willy Douhard --- backend/chainlit/config.py | 4 + .../ask_multiple_files/.chainlit/config.toml | 75 +++++++++++++++++++ frontend/package.json | 2 + frontend/pnpm-lock.yaml | 24 ++++++ .../organisms/chat/inputBox/input.tsx | 46 +++++++++++- frontend/src/state/project.ts | 1 + 6 files changed, 151 insertions(+), 1 deletion(-) create mode 100644 cypress/e2e/ask_multiple_files/.chainlit/config.toml diff --git a/backend/chainlit/config.py b/backend/chainlit/config.py index 993f053cbf..005635f0b4 100644 --- a/backend/chainlit/config.py +++ b/backend/chainlit/config.py @@ -50,6 +50,9 @@ # Authorize users to upload files with messages multi_modal = true +# Allows user to use speech to text +# speech_to_text = true + [UI] # Name of the app and chatbot. name = "Chatbot" @@ -145,6 +148,7 @@ class Theme(DataClassJsonMixin): class FeaturesSettings(DataClassJsonMixin): prompt_playground: bool = True multi_modal: bool = True + speech_to_text: bool = True @dataclass() diff --git a/cypress/e2e/ask_multiple_files/.chainlit/config.toml b/cypress/e2e/ask_multiple_files/.chainlit/config.toml new file mode 100644 index 0000000000..2006dfbc52 --- /dev/null +++ b/cypress/e2e/ask_multiple_files/.chainlit/config.toml @@ -0,0 +1,75 @@ +[project] +# Whether to enable telemetry (default: true). No personal data is collected. +enable_telemetry = true + +# List of environment variables to be provided by each user to use the app. +user_env = [] + +# Duration (in seconds) during which the session is saved when the connection is lost +session_timeout = 3600 + +# Enable third parties caching (e.g LangChain cache) +cache = false + +# Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317) +# follow_symlink = false + +[features] +# Show the prompt playground +prompt_playground = true + +# Authorize users to upload files with messages +multi_modal = true + +[UI] +# Name of the app and chatbot. +name = "Chatbot" + +# Show the readme while the conversation is empty. +show_readme_as_default = true + +# Description of the app and chatbot. This is used for HTML tags. +# description = "" + +# Large size content are by default collapsed for a cleaner ui +default_collapse_content = true + +# The default value for the expand messages settings. +default_expand_messages = false + +# Hide the chain of thought details from the user in the UI. +hide_cot = false + +# Link to your github repo. This will add a github button in the UI's header. +# github = "" + +# Specify a CSS file that can be used to customize the user interface. +# The CSS file can be served from the public directory or via an external link. +# custom_css = "/public/test.css" + +# Allows user to use speech to text +# speech_to_text = true + +# Override default MUI light theme. (Check theme.ts) +[UI.theme.light] + #background = "#FAFAFA" + #paper = "#FFFFFF" + + [UI.theme.light.primary] + #main = "#F80061" + #dark = "#980039" + #light = "#FFE7EB" + +# Override default MUI dark theme. (Check theme.ts) +[UI.theme.dark] + #background = "#FAFAFA" + #paper = "#FFFFFF" + + [UI.theme.dark.primary] + #main = "#F80061" + #dark = "#980039" + #light = "#FFE7EB" + + +[meta] +generated_by = "0.7.301" diff --git a/frontend/package.json b/frontend/package.json index 84afa984ab..98b94c8595 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -27,6 +27,7 @@ "react-hotkeys-hook": "^4.4.1", "react-markdown": "^8.0.7", "react-router-dom": "^6.15.0", + "react-speech-recognition": "^3.10.0", "recoil": "^0.7.6", "remark-gfm": "^3.0.1", "socket.io-client": "^4.7.2", @@ -39,6 +40,7 @@ "@types/lodash": "^4.14.199", "@types/node": "^20.5.7", "@types/react": "^18.2.0", + "@types/react-speech-recognition": "^3.9.2", "@types/uuid": "^9.0.3", "@vitejs/plugin-react-swc": "^3.3.2", "typescript": "^5.2.2", diff --git a/frontend/pnpm-lock.yaml b/frontend/pnpm-lock.yaml index 6d20aa3ec0..058c4bdc86 100644 --- a/frontend/pnpm-lock.yaml +++ b/frontend/pnpm-lock.yaml @@ -50,6 +50,9 @@ dependencies: react-router-dom: specifier: ^6.15.0 version: 6.15.0(react-dom@18.2.0)(react@18.2.0) + react-speech-recognition: + specifier: ^3.10.0 + version: 3.10.0(react@18.2.0) recoil: specifier: ^0.7.6 version: 0.7.6(react-dom@18.2.0)(react@18.2.0) @@ -82,6 +85,9 @@ devDependencies: '@types/react': specifier: ^18.2.0 version: 18.2.0 + '@types/react-speech-recognition': + specifier: ^3.9.2 + version: 3.9.2 '@types/uuid': specifier: ^9.0.3 version: 9.0.3 @@ -870,6 +876,10 @@ packages: '@types/ms': 0.7.32 dev: false + /@types/dom-speech-recognition@0.0.2: + resolution: {integrity: sha512-GZbxBsBYBMAbpPDYg64KhAr/V8MbrlsNqrjnZJGikLdNqlQbTu+u548jg7c31ZI30/vXAX+v31t/aDr4soiBEg==} + dev: true + /@types/hast@2.3.6: resolution: {integrity: sha512-47rJE80oqPmFdVDCD7IheXBrVdwuBgsYwoczFvKmwfo2Mzsnt+V9OONsYauFmICb6lQPpCuXYJWejBNs4pDJRg==} dependencies: @@ -901,6 +911,12 @@ packages: /@types/prop-types@15.7.8: resolution: {integrity: sha512-kMpQpfZKSCBqltAJwskgePRaYRFukDkm1oItcAbC3gNELR20XIBcN9VRgg4+m8DKsTfkWeA4m4Imp4DDuWy7FQ==} + /@types/react-speech-recognition@3.9.2: + resolution: {integrity: sha512-LS13Z4A8nluGWyT1NQncWoyaWARJdEojxmcRvaFDT9nTHpRkMgPjaYBJIc/9GBRYYFy8TQGaiCmUdH2g4M9INg==} + dependencies: + '@types/dom-speech-recognition': 0.0.2 + dev: true + /@types/react-transition-group@4.4.7: resolution: {integrity: sha512-ICCyBl5mvyqYp8Qeq9B5G/fyBSRC0zx3XM3sCC6KkcMsNeAHqXBKkmat4GqdJET5jtYUpZXrxI5flve5qhi2Eg==} dependencies: @@ -1830,6 +1846,14 @@ packages: react: 18.2.0 dev: false + /react-speech-recognition@3.10.0(react@18.2.0): + resolution: {integrity: sha512-EVSr4Ik8l9urwdPiK2r0+ADrLyDDrjB0qBRdUWO+w2MfwEBrj6NuRmy1GD3x7BU/V6/hab0pl8Lupen0zwlJyw==} + peerDependencies: + react: '>=16.8.0' + dependencies: + react: 18.2.0 + dev: false + /react-transition-group@4.4.5(react-dom@18.2.0)(react@18.2.0): resolution: {integrity: sha512-pZcd1MCJoiKiBR2NRxeCRg13uCXbydPnmB4EOeRrY7480qNWO8IIgQG6zlDkm6uRMsURXPuKq0GWtiM59a5Q6g==} peerDependencies: diff --git a/frontend/src/components/organisms/chat/inputBox/input.tsx b/frontend/src/components/organisms/chat/inputBox/input.tsx index acd7846368..cf3935b0a6 100644 --- a/frontend/src/components/organisms/chat/inputBox/input.tsx +++ b/frontend/src/components/organisms/chat/inputBox/input.tsx @@ -1,6 +1,11 @@ import { useCallback, useEffect, useRef, useState } from 'react'; +import SpeechRecognition, { + useSpeechRecognition +} from 'react-speech-recognition'; import { useRecoilState, useSetRecoilState } from 'recoil'; +import KeyboardVoiceIcon from '@mui/icons-material/KeyboardVoice'; +import StopCircleIcon from '@mui/icons-material/StopCircle'; import SendIcon from '@mui/icons-material/Telegram'; import TuneIcon from '@mui/icons-material/Tune'; import { Box, IconButton, Stack, TextField } from '@mui/material'; @@ -18,7 +23,7 @@ import HistoryButton from 'components/organisms/chat/history'; import { attachmentsState } from 'state/chat'; import { chatHistoryState } from 'state/chatHistory'; -import { chatSettingsOpenState } from 'state/project'; +import { chatSettingsOpenState, projectSettingsState } from 'state/project'; import UploadButton from './UploadButton'; @@ -55,6 +60,16 @@ const Input = ({ const [value, setValue] = useState(''); const [isComposing, setIsComposing] = useState(false); + const [isRecording, setIsRecording] = useState(false); + const { transcript, browserSupportsSpeechRecognition } = + useSpeechRecognition(); + + const [pSettings] = useRecoilState(projectSettingsState); + const showTextToSpeech = + (pSettings?.features.speech_to_text === undefined + ? true + : pSettings?.features.speech_to_text) && browserSupportsSpeechRecognition; + const [lastTranscript, setLastTranscript] = useState(''); useEffect(() => { const pasteEvent = (event: ClipboardEvent) => { @@ -104,6 +119,13 @@ const Input = ({ } }, [loading, disabled]); + useEffect(() => { + if (lastTranscript.length < transcript.length) { + setValue((text) => text + transcript.slice(lastTranscript.length)); + } + setLastTranscript(transcript); + }, [transcript]); + const submit = useCallback(() => { if (value === '' || disabled) { return; @@ -169,6 +191,28 @@ const Input = ({ )} + {showTextToSpeech && + (isRecording ? ( + { + setIsRecording(false); + SpeechRecognition.stopListening(); + }} + > + + + ) : ( + { + setIsRecording(true); + SpeechRecognition.startListening({ + continuous: true + }); + }} + > + + + ))}