React based Speech-to-Text and Text-to-Speech library with multiple engine support.
- Multiple STT (Speech-to-Text) engines support:
- Web Speech API - Speech Recognition (Browser built-in) ✅
- Azure Speech SDK (Coming soon)
- Google Cloud V2 (Coming soon)
- Return Zero (Coming soon)
- Multiple TTS (Text-to-Speech) engines support:
- Web Speech API - Speech Synthesis (Browser built-in) ✅
- Google Cloud (Coming soon)
- Naver Clova (Coming soon)
- React hooks for easy integration
- TypeScript support
- Tree-shakeable
npm install react-stt-tts
# or
yarn add react-stt-tts
# or
pnpm add react-stt-tts
Samples - StackBlitz
import { VoiceProvider, useSTT, useTTS } from "react-stt-tts";
// STT Configuration
const sttConfig = {
model: "web-speech",
language: "ko-KR", // Korean
continuous: true, // Continuous recognition
interimResults: true, // Show interim results
};
// TTS Configuration
const ttsConfig = {
model: "web-speech",
language: "ko-KR", // Korean
rate: 1, // Speaking rate
pitch: 1, // Pitch
volume: 1, // Volume
};
function App() {
return (
<VoiceProvider sttConfig={sttConfig} ttsConfig={ttsConfig}>
<YourComponent />
</VoiceProvider>
);
}
function YourComponent() {
// Using STT hook
const { start: startSTT, stop: stopSTT, isListening } = useSTT();
// Using TTS hook
const { start: startTTS, stop: stopTTS, isSpeaking } = useTTS();
return (
<div>
{/* STT Controls */}
<button onClick={startSTT} disabled={isListening}>
Start Speech Recognition
</button>
<button onClick={stopSTT} disabled={!isListening}>
Stop Speech Recognition
</button>
{/* <div>Recognition Result: {result?.text}</div> */}
{/* TTS Controls */}
<button onClick={() => startTTS("Hello!")} disabled={isSpeaking}>
Speak
</button>
<button onClick={stopTTS} disabled={!isSpeaking}>
Stop
</button>
</div>
);
}
Prop | Type | Description |
---|---|---|
sttConfig | STTConfig | STT initial engine configuration |
ttsConfig | TTSConfig | TTS initial engine configuration |
children | ReactNode | React child components |
interface STTConfig {
model: "web-speech" | "azure-speech-sdk" | "google-cloud-v2" | "return-zero";
apiKey?: string;
token?: string;
region?: string;
language?: string;
continuous?: boolean;
interimResults?: boolean;
maxAlternatives?: number;
}
interface TTSConfig {
model: "web-speech" | "google-cloud" | "naver-clova";
apiKey?: string;
voice?: string;
language?: string;
pitch?: number;
rate?: number;
volume?: number;
}
const {
sttConfig, // Current STT configuration
ttsConfig, // Current TTS configuration
setSTTConfig, // Function to update STT configuration
setTTSConfig, // Function to update TTS configuration
} = useVoiceConfig();
const {
sttConfig, // Current STT configuration
setSTTConfig, // Function to update STT configuration
} = useSTTConfig();
const {
ttsConfig, // Current TTS configuration
setTTSConfig, // Function to update TTS configuration
} = useTTSConfig();
const {
start, // Start speech recognition with options
stop, // Stop speech recognition
isListening, // Whether listening
} = useSTT();
// STTStartOptions interface
interface STTStartOptions {
onMediaStream?: (stream: MediaStream | null) => void;
onAfterMicPermission?: () => void;
onRecognizing?: (result: STTResult) => void;
onRecognized?: (result: STTResult) => void;
onCancelled?: (reason: string) => void;
onSessionStopped?: () => void;
}
// STTResult interface
interface STTResult {
text: string;
confidence?: number;
alternatives?: string[];
}
const {
start, // Speak text with options
stop, // Stop speaking
isSpeaking, // Whether speaking
} = useTTS();
// TTSStartOptions interface
interface TTSStartOptions {
text: string;
onAudioStarted?: () => void;
onAudioEnded?: () => void;
onError?: (error: unknown) => void;
}
// TTSResult interface
interface TTSResult {
audio: Blob;
duration: number;
}
MIT