Skip to content

Commit

Permalink
Speech to text | Issue #21 (#375)
Browse files Browse the repository at this point in the history
* Implemented speech to text.
Requires checking for tests

---------

Co-authored-by: Willy Douhard <willy.douhard@gmail.com>
  • Loading branch information
mmnasser2000 and willydouhard authored Oct 23, 2023
1 parent 60b3d1f commit 62192d9
Show file tree
Hide file tree
Showing 6 changed files with 151 additions and 1 deletion.
4 changes: 4 additions & 0 deletions backend/chainlit/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@
# Authorize users to upload files with messages
multi_modal = true
# Allows user to use speech to text
# speech_to_text = true
[UI]
# Name of the app and chatbot.
name = "Chatbot"
Expand Down Expand Up @@ -145,6 +148,7 @@ class Theme(DataClassJsonMixin):
class FeaturesSettings(DataClassJsonMixin):
prompt_playground: bool = True
multi_modal: bool = True
speech_to_text: bool = True


@dataclass()
Expand Down
75 changes: 75 additions & 0 deletions cypress/e2e/ask_multiple_files/.chainlit/config.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
[project]
# Whether to enable telemetry (default: true). No personal data is collected.
enable_telemetry = true

# List of environment variables to be provided by each user to use the app.
user_env = []

# Duration (in seconds) during which the session is saved when the connection is lost
session_timeout = 3600

# Enable third parties caching (e.g LangChain cache)
cache = false

# Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317)
# follow_symlink = false

[features]
# Show the prompt playground
prompt_playground = true

# Authorize users to upload files with messages
multi_modal = true

[UI]
# Name of the app and chatbot.
name = "Chatbot"

# Show the readme while the conversation is empty.
show_readme_as_default = true

# Description of the app and chatbot. This is used for HTML tags.
# description = ""

# Large size content are by default collapsed for a cleaner ui
default_collapse_content = true

# The default value for the expand messages settings.
default_expand_messages = false

# Hide the chain of thought details from the user in the UI.
hide_cot = false

# Link to your github repo. This will add a github button in the UI's header.
# github = ""

# Specify a CSS file that can be used to customize the user interface.
# The CSS file can be served from the public directory or via an external link.
# custom_css = "/public/test.css"

# Allows user to use speech to text
# speech_to_text = true

# Override default MUI light theme. (Check theme.ts)
[UI.theme.light]
#background = "#FAFAFA"
#paper = "#FFFFFF"

[UI.theme.light.primary]
#main = "#F80061"
#dark = "#980039"
#light = "#FFE7EB"

# Override default MUI dark theme. (Check theme.ts)
[UI.theme.dark]
#background = "#FAFAFA"
#paper = "#FFFFFF"

[UI.theme.dark.primary]
#main = "#F80061"
#dark = "#980039"
#light = "#FFE7EB"


[meta]
generated_by = "0.7.301"
2 changes: 2 additions & 0 deletions frontend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
"react-hotkeys-hook": "^4.4.1",
"react-markdown": "^8.0.7",
"react-router-dom": "^6.15.0",
"react-speech-recognition": "^3.10.0",
"recoil": "^0.7.6",
"remark-gfm": "^3.0.1",
"socket.io-client": "^4.7.2",
Expand All @@ -39,6 +40,7 @@
"@types/lodash": "^4.14.199",
"@types/node": "^20.5.7",
"@types/react": "^18.2.0",
"@types/react-speech-recognition": "^3.9.2",
"@types/uuid": "^9.0.3",
"@vitejs/plugin-react-swc": "^3.3.2",
"typescript": "^5.2.2",
Expand Down
24 changes: 24 additions & 0 deletions frontend/pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

46 changes: 45 additions & 1 deletion frontend/src/components/organisms/chat/inputBox/input.tsx
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
import { useCallback, useEffect, useRef, useState } from 'react';
import SpeechRecognition, {
useSpeechRecognition
} from 'react-speech-recognition';
import { useRecoilState, useSetRecoilState } from 'recoil';

import KeyboardVoiceIcon from '@mui/icons-material/KeyboardVoice';
import StopCircleIcon from '@mui/icons-material/StopCircle';
import SendIcon from '@mui/icons-material/Telegram';
import TuneIcon from '@mui/icons-material/Tune';
import { Box, IconButton, Stack, TextField } from '@mui/material';
Expand All @@ -18,7 +23,7 @@ import HistoryButton from 'components/organisms/chat/history';

import { attachmentsState } from 'state/chat';
import { chatHistoryState } from 'state/chatHistory';
import { chatSettingsOpenState } from 'state/project';
import { chatSettingsOpenState, projectSettingsState } from 'state/project';

import UploadButton from './UploadButton';

Expand Down Expand Up @@ -55,6 +60,16 @@ const Input = ({

const [value, setValue] = useState('');
const [isComposing, setIsComposing] = useState(false);
const [isRecording, setIsRecording] = useState(false);
const { transcript, browserSupportsSpeechRecognition } =
useSpeechRecognition();

const [pSettings] = useRecoilState(projectSettingsState);
const showTextToSpeech =
(pSettings?.features.speech_to_text === undefined
? true
: pSettings?.features.speech_to_text) && browserSupportsSpeechRecognition;
const [lastTranscript, setLastTranscript] = useState('');

useEffect(() => {
const pasteEvent = (event: ClipboardEvent) => {
Expand Down Expand Up @@ -104,6 +119,13 @@ const Input = ({
}
}, [loading, disabled]);

useEffect(() => {
if (lastTranscript.length < transcript.length) {
setValue((text) => text + transcript.slice(lastTranscript.length));
}
setLastTranscript(transcript);
}, [transcript]);

const submit = useCallback(() => {
if (value === '' || disabled) {
return;
Expand Down Expand Up @@ -169,6 +191,28 @@ const Input = ({
<TuneIcon />
</IconButton>
)}
{showTextToSpeech &&
(isRecording ? (
<IconButton
onClick={() => {
setIsRecording(false);
SpeechRecognition.stopListening();
}}
>
<StopCircleIcon />
</IconButton>
) : (
<IconButton
onClick={() => {
setIsRecording(true);
SpeechRecognition.startListening({
continuous: true
});
}}
>
<KeyboardVoiceIcon />
</IconButton>
))}
<UploadButton
disabled={disabled}
fileSpec={fileSpec}
Expand Down
1 change: 1 addition & 0 deletions frontend/src/state/project.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ export interface IProjectSettings {
};
features: {
multi_modal?: boolean;
speech_to_text?: boolean;
};
userEnv: string[];
dataPersistence: boolean;
Expand Down

0 comments on commit 62192d9

Please sign in to comment.