From 4fa2ef410f2f0ff31de357e981db0b43eabd51b7 Mon Sep 17 00:00:00 2001
From: CanisMinor <i@canisminor.cc>
Date: Sun, 19 Nov 2023 21:43:58 +0800
Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20feat:=20support=20TTS=20&=20STT=20(?=
 =?UTF-8?q?#443)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* ✨ feat(tts): Add tts and stt basic features

* ✨ feat(tts): Handle error

* 💄 style(tts): Add alert to error handler

* 🐛 fix(tts): Error display

* ♻️ refactor: refactor the openai initial code to the createBizOpenAI

* ♻️ refactor(tts): Refactor header config

* ✨ feat: Add TTS voice preview

* 🐛 fix(tts): Fix header

* 🐛 fix: Fix api

---------

Co-authored-by: Arvin Xu <arvinx@foxmail.com>
---
 .i18nrc.js                                    |   2 +-
 README.md                                     |   4 +
 README.zh-CN.md                               |   4 +
 __mocks__/zustand/traditional.ts              |   8 +-
 locales/en_US/chat.json                       |  12 +-
 locales/en_US/error.json                      |   6 +
 locales/en_US/setting.json                    |  41 ++++-
 locales/ja_JP/chat.json                       |  12 +-
 locales/ja_JP/error.json                      |   6 +
 locales/ja_JP/setting.json                    |  41 ++++-
 locales/ko_KR/chat.json                       |  12 +-
 locales/ko_KR/error.json                      |   6 +
 locales/ko_KR/setting.json                    |  41 ++++-
 locales/ru_RU/chat.json                       |  12 +-
 locales/ru_RU/error.json                      |   6 +
 locales/ru_RU/setting.json                    |  41 ++++-
 locales/zh_CN/chat.json                       |  11 +-
 locales/zh_CN/error.json                      |   6 +
 locales/zh_CN/setting.json                    |  37 ++++-
 locales/zh_TW/chat.json                       |  12 +-
 locales/zh_TW/error.json                      |   6 +
 locales/zh_TW/setting.json                    |  41 ++++-
 next.config.mjs                               |  22 +--
 package.json                                  |   1 +
 src/app/api/openai/chat/route.ts              |  46 +-----
 .../createAzureOpenai.ts                      |   2 +-
 .../{ => createBizOpenAI}/createOpenai.ts     |   2 +-
 src/app/api/openai/createBizOpenAI/index.ts   |  46 ++++++
 src/app/api/openai/models/route.ts            |   2 +-
 src/app/api/openai/stt/route.ts               |  29 ++++
 src/app/api/openai/tts/route.ts               |  17 ++
 src/app/api/tts/edge-speech/route.ts          |   9 ++
 src/app/api/tts/microsoft-speech/route.ts     |   9 ++
 .../chat/(desktop)/features/ChatHeader.tsx    |   5 +-
 .../(desktop)/features/ChatInput/Footer.tsx   |  62 ++++++++
 .../(desktop)/features/ChatInput/index.tsx    |   1 -
 .../chat/(desktop)/features/SessionHeader.tsx |   4 +-
 src/app/chat/(mobile)/features/ChatHeader.tsx |  40 +++--
 .../(mobile)/features/ChatInput/Mobile.tsx    |  24 ++-
 .../features/ChatInput/style.mobile.ts        |  19 ---
 .../features/ChatHeader/ShareButton/index.tsx |  38 +++--
 .../features/ChatInput/ActionBar/config.ts    |  22 ++-
 .../features/ChatInput/ActionBar/index.tsx    |  56 +++++--
 src/app/chat/features/ChatInput/STT/index.tsx | 146 ++++++++++++++++++
 .../chat/features/ChatInput/Topic/index.tsx   |   4 +-
 .../ChatList/Actions/Assistant.tsx            |   4 +-
 .../Conversation/ChatList/Actions/User.tsx    |   4 +-
 .../ChatList/Actions/customAction.ts          |  11 +-
 .../Conversation/ChatList/Actions/index.ts    |   9 +-
 .../ChatList/Extras/Assistant.tsx             |  60 +++----
 .../ChatList/Extras/AudioPlayer.tsx           | 139 +++++++++++++++++
 .../ChatList/Extras/ExtraContainer.tsx        |  13 ++
 .../Conversation/ChatList/Extras/TTS.tsx      | 122 +++++++++++++++
 .../Conversation/ChatList/Extras/User.tsx     |  29 ++--
 .../chat/settings/features/HeaderContent.tsx  |  10 +-
 .../features/SubmitAgentButton/index.tsx      |   5 +-
 src/app/settings/features/SideBar/List.tsx    |   3 +-
 src/app/settings/features/SideBar/index.tsx   |   2 +-
 src/app/settings/features/UpgradeAlert.tsx    |   2 +-
 src/app/settings/tts/TTS/index.tsx            |  73 +++++++++
 src/app/settings/tts/TTS/options.ts           |  30 ++++
 src/app/settings/tts/index.tsx                |  21 +++
 src/app/settings/tts/layout.tsx               |   1 +
 src/app/settings/tts/page.tsx                 |   3 +
 src/components/HotKeys/index.tsx              |  11 +-
 src/const/layoutTokens.ts                     |   3 +
 src/const/settings.ts                         |  23 ++-
 .../AgentTTS/SelectWithTTSPreview.tsx         | 119 ++++++++++++++
 src/features/AgentSetting/AgentTTS/index.tsx  | 115 ++++++++++++++
 src/features/AgentSetting/AgentTTS/options.ts |  16 ++
 src/features/AgentSetting/index.tsx           |   3 +-
 src/hooks/useSTT.ts                           |  61 ++++++++
 src/hooks/useTTS.ts                           |  87 +++++++++++
 src/locales/default/chat.ts                   |  11 +-
 src/locales/default/error.ts                  |   6 +
 src/locales/default/setting.ts                |  36 ++++-
 src/services/_url.ts                          |   7 +
 src/store/global/initialState.ts              |   1 +
 src/store/global/selectors/settings.test.ts   |  32 ++++
 src/store/global/selectors/settings.ts        |  10 +-
 .../session/slices/agentConfig/selectors.ts   |   9 ++
 .../session/slices/chat/actions/translate.ts  |  25 ++-
 .../slices/chat/selectors/utils.test.ts       |  16 ++
 .../slices/session/reducers/session.test.ts   |  24 +++
 src/types/chatMessage.ts                      |   6 +
 src/types/session.ts                          |  17 ++
 src/types/settings.ts                         |  15 +-
 87 files changed, 1937 insertions(+), 230 deletions(-)
 rename src/app/api/openai/{ => createBizOpenAI}/createAzureOpenai.ts (97%)
 rename src/app/api/openai/{ => createBizOpenAI}/createOpenai.ts (95%)
 create mode 100644 src/app/api/openai/createBizOpenAI/index.ts
 create mode 100644 src/app/api/openai/stt/route.ts
 create mode 100644 src/app/api/openai/tts/route.ts
 create mode 100644 src/app/api/tts/edge-speech/route.ts
 create mode 100644 src/app/api/tts/microsoft-speech/route.ts
 create mode 100644 src/app/chat/(desktop)/features/ChatInput/Footer.tsx
 delete mode 100644 src/app/chat/(mobile)/features/ChatInput/style.mobile.ts
 create mode 100644 src/app/chat/features/ChatInput/STT/index.tsx
 create mode 100644 src/app/chat/features/Conversation/ChatList/Extras/AudioPlayer.tsx
 create mode 100644 src/app/chat/features/Conversation/ChatList/Extras/ExtraContainer.tsx
 create mode 100644 src/app/chat/features/Conversation/ChatList/Extras/TTS.tsx
 create mode 100644 src/app/settings/tts/TTS/index.tsx
 create mode 100644 src/app/settings/tts/TTS/options.ts
 create mode 100644 src/app/settings/tts/index.tsx
 create mode 100644 src/app/settings/tts/layout.tsx
 create mode 100644 src/app/settings/tts/page.tsx
 create mode 100644 src/features/AgentSetting/AgentTTS/SelectWithTTSPreview.tsx
 create mode 100644 src/features/AgentSetting/AgentTTS/index.tsx
 create mode 100644 src/features/AgentSetting/AgentTTS/options.ts
 create mode 100644 src/hooks/useSTT.ts
 create mode 100644 src/hooks/useTTS.ts

diff --git a/.i18nrc.js b/.i18nrc.js
index 76886685dfcc..9fb8984c03df 100644
--- a/.i18nrc.js
+++ b/.i18nrc.js
@@ -8,5 +8,5 @@ module.exports = defineConfig({
   output: 'locales',
   outputLocales: ['zh_TW', 'en_US', 'ru_RU', 'ja_JP', 'ko_KR'],
   temperature: 0,
-  modelName: 'gpt-3.5-turbo',
+  modelName: 'gpt-3.5-turbo-1106',
 });
diff --git a/README.md b/README.md
index 0fef80ee6b51..1b66cc6b1b89 100644
--- a/README.md
+++ b/README.md
@@ -317,6 +317,7 @@ This project provides some additional configuration items set with environment v
 | NPM                             | Repository                            | Description                                                                                                             | Version                                 |
 | ------------------------------- | ------------------------------------- | ----------------------------------------------------------------------------------------------------------------------- | --------------------------------------- |
 | [@lobehub/ui][lobe-ui-link]     | [lobehub/lobe-ui][lobe-ui-github]     | Lobe UI is an open-source UI component library dedicated to building AIGC web applications.                             | [![][lobe-ui-shield]][lobe-ui-link]     |
+| [@lobehub/tts][lobe-tts-link]   | [lobehub/lobe-tts][lobe-tts-github]   | Lobe TTS is a high-quality & reliable TTS/STT React Hooks library                                                       | [![][lobe-tts-shield]][lobe-tts-link]   |
 | [@lobehub/lint][lobe-lint-link] | [lobehub/lobe-lint][lobe-lint-github] | LobeLint provides configurations for ESlint, Stylelint, Commitlint, Prettier, Remark, and Semantic Release for LobeHub. | [![][lobe-lint-shield]][lobe-lint-link] |
 | @lobehub/assets                 | [lobehub/assets][lobe-assets-github]  | Logo assets, favicons, webfonts for LobeHub.                                                                            |                                         |
 
@@ -483,6 +484,9 @@ This project is [MIT](./LICENSE) licensed.
 [lobe-lint-link]: https://www.npmjs.com/package/@lobehub/lint
 [lobe-lint-shield]: https://img.shields.io/npm/v/@lobehub/lint?color=369eff&labelColor=black&logo=npm&logoColor=white&style=flat-square
 [lobe-theme]: https://github.com/lobehub/sd-webui-lobe-theme
+[lobe-tts-github]: https://github.com/lobehub/lobe-tts
+[lobe-tts-link]: https://www.npmjs.com/package/@lobehub/tts
+[lobe-tts-shield]: https://img.shields.io/npm/v/@lobehub/tts?color=369eff&labelColor=black&logo=npm&logoColor=white&style=flat-square
 [lobe-ui-github]: https://github.com/lobehub/lobe-ui
 [lobe-ui-link]: https://www.npmjs.com/package/@lobehub/ui
 [lobe-ui-shield]: https://img.shields.io/npm/v/@lobehub/ui?color=369eff&labelColor=black&logo=npm&logoColor=white&style=flat-square
diff --git a/README.zh-CN.md b/README.zh-CN.md
index 9e60712ad4ba..08f5adffd8e6 100644
--- a/README.zh-CN.md
+++ b/README.zh-CN.md
@@ -317,6 +317,7 @@ $ docker run -d -p 3210:3210 \
 | NPM                             | 仓库                                  | 描述                                                                                                  | 版本                                    |
 | ------------------------------- | ------------------------------------- | ----------------------------------------------------------------------------------------------------- | --------------------------------------- |
 | [@lobehub/ui][lobe-ui-link]     | [lobehub/lobe-ui][lobe-ui-github]     | Lobe UI 是一个专为构建 AIGC 网页应用程序而设计的开源 UI 组件库。                                      | [![][lobe-ui-shield]][lobe-ui-link]     |
+| [@lobehub/tts][lobe-tts-link]   | [lobehub/lobe-tts][lobe-tts-github]   | Lobe TTS 是一个专为 TTS/STT 建设的语音合成 / 识别 React Hooks 库                                      | [![][lobe-tts-shield]][lobe-tts-link]   |
 | [@lobehub/lint][lobe-lint-link] | [lobehub/lobe-lint][lobe-lint-github] | LobeLint 为 LobeHub 提供 ESlint，Stylelint，Commitlint，Prettier，Remark 和 Semantic Release 的配置。 | [![][lobe-lint-shield]][lobe-lint-link] |
 | @lobehub/assets                 | [lobehub/assets][lobe-assets-github]  | LobeHub 的 Logo 资源、favicon、网页字体。                                                             |                                         |
 
@@ -483,6 +484,9 @@ This project is [MIT](./LICENSE) licensed.
 [lobe-lint-link]: https://www.npmjs.com/package/@lobehub/lint
 [lobe-lint-shield]: https://img.shields.io/npm/v/@lobehub/lint?color=369eff&labelColor=black&logo=npm&logoColor=white&style=flat-square
 [lobe-theme]: https://github.com/lobehub/sd-webui-lobe-theme
+[lobe-tts-github]: https://github.com/lobehub/lobe-tts
+[lobe-tts-link]: https://www.npmjs.com/package/@lobehub/tts
+[lobe-tts-shield]: https://img.shields.io/npm/v/@lobehub/tts?color=369eff&labelColor=black&logo=npm&logoColor=white&style=flat-square
 [lobe-ui-github]: https://github.com/lobehub/lobe-ui
 [lobe-ui-link]: https://www.npmjs.com/package/@lobehub/ui
 [lobe-ui-shield]: https://img.shields.io/npm/v/@lobehub/ui?color=369eff&labelColor=black&logo=npm&logoColor=white&style=flat-square
diff --git a/__mocks__/zustand/traditional.ts b/__mocks__/zustand/traditional.ts
index d1f55af04338..262acd02a776 100644
--- a/__mocks__/zustand/traditional.ts
+++ b/__mocks__/zustand/traditional.ts
@@ -15,11 +15,11 @@ const createImpl = (createState: any) => {
 
 // Reset all stores after each test run
 beforeEach(() => {
-  act(() =>
-    { for (const resetFn of storeResetFns) {
+  act(() => {
+    for (const resetFn of storeResetFns) {
       resetFn();
-    } },
-  );
+    }
+  });
 });
 
 export const createWithEqualityFn = (f: any) => (f === undefined ? createImpl : createImpl(f));
diff --git a/locales/en_US/chat.json b/locales/en_US/chat.json
index 42eef5884ef5..cf14c7947ea3 100644
--- a/locales/en_US/chat.json
+++ b/locales/en_US/chat.json
@@ -35,6 +35,11 @@
     "withSystemRole": "Include Agent Role Setting"
   },
   "stop": "Stop",
+  "stt": {
+    "action": "Voice Input",
+    "loading": "Recognizing...",
+    "prettifying": "Prettifying..."
+  },
   "temp": "Temporary",
   "tokenDetail": "Role Setting: {{systemRoleToken}} · Chat History: {{chatsToken}}",
   "tokenTag": {
@@ -57,9 +62,14 @@
     "openNewTopic": "Open a new topic"
   },
   "translate": {
-    "clear": "Clear Translation"
+    "clear": "Clear Translation",
+    "action": "Translate"
   },
   "translateTo": "Translate",
+  "tts": {
+    "action": "Text to Speech",
+    "clear": "Clear Speech"
+  },
   "updateAgent": "Update Agent Information",
   "upload": {
     "actionTooltip": "Upload Image",
diff --git a/locales/en_US/error.json b/locales/en_US/error.json
index 58110c6fbd16..da53f0f8f8de 100644
--- a/locales/en_US/error.json
+++ b/locales/en_US/error.json
@@ -28,6 +28,12 @@
     "PluginServerError": "Plugin server request returned an error. Please check your plugin manifest file, plugin configuration, or server implementation based on the error information below",
     "NoAPIKey": "OpenAI API Key is empty, please add a custom OpenAI API Key"
   },
+  "stt": {
+    "responseError": "Service request failed, please check the configuration or try again"
+  },
+  "tts": {
+    "responseError": "Service request failed, please check the configuration or try again"
+  },
   "unlock": {
     "apikey": {
       "title": "Use Custom API Key",
diff --git a/locales/en_US/setting.json b/locales/en_US/setting.json
index 04c340ba6035..a0006a0b74a7 100644
--- a/locales/en_US/setting.json
+++ b/locales/en_US/setting.json
@@ -207,6 +207,44 @@
     },
     "title": "System Settings"
   },
+  "settingTTS": {
+    "showAllLocaleVoice": {
+      "desc": "If disabled, only voices for the current language will be displayed",
+      "title": "Show all locale voices"
+    },
+    "sttService": {
+      "desc": "The 'browser' option refers to the native speech recognition service in the browser",
+      "title": "Speech-to-Text Service"
+    },
+    "title": "Speech Services",
+    "ttsService": {
+      "desc": "If using the OpenAI text-to-speech service, ensure that the OpenAI model service is enabled",
+      "title": "Text-to-Speech Service"
+    },
+    "voice": {
+      "title": "Text-to-Speech Voices",
+      "desc": "Select a voice for the current assistant, different TTS services support different voices",
+      "preview": "Preview Voice"
+    },
+    "openai": {
+      "sttModel": "OpenAI Speech Recognition Model",
+      "ttsModel": "OpenAI Text-to-Speech Model"
+    },
+    "stt": "Speech Recognition Settings",
+    "sttLocale": {
+      "desc": "The language of the speech input, this option can improve the accuracy of speech recognition",
+      "title": "Speech Recognition Language"
+    },
+    "sttPersisted": {
+      "desc": "When enabled, speech recognition will not automatically end and requires manual click on the end button",
+      "title": "Manually End Speech Recognition"
+    },
+    "tts": "Text-to-Speech Settings",
+    "sttAutoStop": {
+      "desc": "When disabled, speech recognition will not automatically stop and will require manual intervention to end the process.",
+      "title": "Automatic Speech Recognition Termination"
+    }
+  },
   "settingTheme": {
     "avatar": {
       "title": "Avatar"
@@ -245,6 +283,7 @@
   "tab": {
     "agent": "Default Agent",
     "common": "Common Settings",
-    "llm": "Custom LLM API"
+    "llm": "Custom LLM API",
+    "tts": "Speech Services"
   }
 }
diff --git a/locales/ja_JP/chat.json b/locales/ja_JP/chat.json
index 9a029a2e2737..9db940879dc8 100644
--- a/locales/ja_JP/chat.json
+++ b/locales/ja_JP/chat.json
@@ -35,6 +35,11 @@
     "withSystemRole": "エージェントの役割設定を含む"
   },
   "stop": "停止",
+  "stt": {
+    "action": "音声入力",
+    "loading": "読み取り中...",
+    "prettifying": "美化中..."
+  },
   "temp": "一時",
   "tokenDetail": "役割設定: {{systemRoleToken}} · チャット履歴: {{chatsToken}}",
   "tokenTag": {
@@ -57,9 +62,14 @@
     "openNewTopic": "新しいトピックを開く"
   },
   "translate": {
-    "clear": "翻訳をクリア"
+    "clear": "翻訳をクリア",
+    "action": "翻訳"
   },
   "translateTo": "翻訳",
+  "tts": {
+    "action": "音声読み上げ",
+    "clear": "音声削除"
+  },
   "updateAgent": "エージェント情報を更新",
   "upload": {
     "actionTooltip": "画像をアップロード",
diff --git a/locales/ja_JP/error.json b/locales/ja_JP/error.json
index cfba29408178..b667be088453 100644
--- a/locales/ja_JP/error.json
+++ b/locales/ja_JP/error.json
@@ -28,6 +28,12 @@
     "PluginApiNotFound": "申し訳ありませんが、プラグインのマニフェストに指定されたAPIが見つかりませんでした。リクエストメソッドとプラグインのマニフェストのAPIが一致しているかどうかを確認してください",
     "NoAPIKey": "OpenAI APIキーが空です。カスタムOpenAI APIキーを追加してください。"
   },
+  "stt": {
+    "responseError": "サービスリクエストが失敗しました。設定を確認するか、もう一度お試しください"
+  },
+  "tts": {
+    "responseError": "サービスリクエストが失敗しました。設定を確認するか、もう一度お試しください"
+  },
   "unlock": {
     "apikey": {
       "title": "カスタムAPIキーの使用",
diff --git a/locales/ja_JP/setting.json b/locales/ja_JP/setting.json
index ceca7990abd6..23e77976088f 100644
--- a/locales/ja_JP/setting.json
+++ b/locales/ja_JP/setting.json
@@ -194,6 +194,44 @@
     },
     "title": "システム設定"
   },
+  "settingTTS": {
+    "showAllLocaleVoice": {
+      "desc": "关闭すると、現在の言語の音声のみが表示されます",
+      "title": "すべての言語の音声を表示"
+    },
+    "sttService": {
+      "desc": "ブラウザはブラウザのネイティブ音声認識サービスです",
+      "title": "音声認識サービス"
+    },
+    "title": "音声サービス",
+    "ttsService": {
+      "desc": "OpenAIの音声合成サービスを使用する場合、OpenAIモデルサービスが有効になっていることを確認する必要があります",
+      "title": "音声合成サービス"
+    },
+    "voice": {
+      "title": "音声合成音声源",
+      "desc": "現在のアシスタントに適した音声を選択します。異なるTTSサービスは異なる音声をサポートしています",
+      "preview": "プレビュー"
+    },
+    "openai": {
+      "sttModel": "OpenAI 音声認識モデル",
+      "ttsModel": "OpenAI 音声合成モデル"
+    },
+    "stt": "音声認識設定",
+    "sttLocale": {
+      "desc": "音声入力の言語、このオプションを選択すると音声認識の精度が向上します",
+      "title": "音声認識言語"
+    },
+    "sttPersisted": {
+      "desc": "有効にすると、音声認識が自動的に終了せず、手動で終了ボタンをクリックする必要があります",
+      "title": "音声認識の手動終了"
+    },
+    "tts": "音声合成設定",
+    "sttAutoStop": {
+      "desc": "オフにすると、音声認識が自動的に終了せず、手動で終了ボタンをクリックする必要があります",
+      "title": "音声認識の自動終了"
+    }
+  },
   "settingTheme": {
     "avatar": {
       "title": "アバター"
@@ -232,6 +270,7 @@
   "tab": {
     "agent": "デフォルトのアシスタント",
     "common": "一般設定",
-    "llm": "言語モデル"
+    "llm": "言語モデル",
+    "tts": "音声サービス"
   }
 }
diff --git a/locales/ko_KR/chat.json b/locales/ko_KR/chat.json
index 28476214cb34..9c960285c609 100644
--- a/locales/ko_KR/chat.json
+++ b/locales/ko_KR/chat.json
@@ -35,6 +35,11 @@
     "withSystemRole": "도우미 역할 포함"
   },
   "stop": "정지",
+  "stt": {
+    "action": "음성 입력",
+    "loading": "인식 중...",
+    "prettifying": "미화 중..."
+  },
   "temp": "임시",
   "tokenDetail": "역할 설정: {{systemRoleToken}} · 대화 기록: {{chatsToken}}",
   "tokenTag": {
@@ -57,9 +62,14 @@
     "openNewTopic": "새로운 주제 열기"
   },
   "translate": {
-    "clear": "번역 지우기"
+    "clear": "번역 지우기",
+    "action": "번역"
   },
   "translateTo": "번역",
+  "tts": {
+    "action": "음성 읽기",
+    "clear": "음성 삭제"
+  },
   "updateAgent": "도우미 정보 업데이트",
   "upload": {
     "actionTooltip": "이미지 업로드",
diff --git a/locales/ko_KR/error.json b/locales/ko_KR/error.json
index 6caaafa92af5..b934e1f3c955 100644
--- a/locales/ko_KR/error.json
+++ b/locales/ko_KR/error.json
@@ -28,6 +28,12 @@
     "OpenAIBizError": "OpenAI 서비스 요청 중 오류가 발생했습니다. 아래 정보를 확인하고 문제를 해결하거나 다시 시도해주세요.",
     "NoAPIKey": "OpenAI API 키가 비어 있습니다. 사용자 정의 OpenAI API 키를 추가해주세요."
   },
+  "stt": {
+    "responseError": "서비스 요청이 실패했습니다. 구성을 확인하거나 다시 시도해주세요."
+  },
+  "tts": {
+    "responseError": "서비스 요청이 실패했습니다. 구성을 확인하거나 다시 시도해주세요."
+  },
   "unlock": {
     "apikey": {
       "title": "사용자 정의 API 키 사용",
diff --git a/locales/ko_KR/setting.json b/locales/ko_KR/setting.json
index 1e217133db51..6e57e61e033c 100644
--- a/locales/ko_KR/setting.json
+++ b/locales/ko_KR/setting.json
@@ -194,6 +194,44 @@
     },
     "title": "시스템 설정"
   },
+  "settingTTS": {
+    "showAllLocaleVoice": {
+      "desc": "현재 언어의 음성만 표시하려면 닫으십시오",
+      "title": "모든 언어 음성 표시"
+    },
+    "sttService": {
+      "desc": "브라우저는 브라우저 기본 음성 인식 서비스입니다",
+      "title": "음성 인식 서비스"
+    },
+    "title": "음성 서비스",
+    "ttsService": {
+      "desc": "OpenAI 음성 합성 서비스를 사용하는 경우 OpenAI 모델 서비스가 열려 있어야 합니다",
+      "title": "음성 합성 서비스"
+    },
+    "voice": {
+      "title": "음성 합성 음성",
+      "desc": "현재 어시스턴트에 대한 음성을 선택하십시오. 각기 다른 TTS 서비스는 다른 음성을 지원합니다.",
+      "preview": "프리뷰 음성"
+    },
+    "openai": {
+      "sttModel": "OpenAI 음성 인식 모델",
+      "ttsModel": "OpenAI 음성 합성 모델"
+    },
+    "stt": "음성 인식 설정",
+    "sttLocale": {
+      "desc": "음성 입력의 언어, 이 옵션을 통해 음성 인식 정확도를 높일 수 있습니다.",
+      "title": "음성 인식 언어"
+    },
+    "sttPersisted": {
+      "desc": "활성화하면 음성 인식이 자동으로 종료되지 않고, 수동으로 종료 버튼을 클릭해야 합니다.",
+      "title": "음성 인식 수동 종료"
+    },
+    "tts": "음성 합성 설정",
+    "sttAutoStop": {
+      "desc": "자동으로 종료되지 않고 수동으로 종료 버튼을 클릭해야 하는 음성 인식을 사용하지 않습니다.",
+      "title": "음성 인식 자동 종료"
+    }
+  },
   "settingTheme": {
     "avatar": {
       "title": "아바타"
@@ -232,6 +270,7 @@
   "tab": {
     "agent": "기본 도우미",
     "common": "일반 설정",
-    "llm": "언어 모델"
+    "llm": "언어 모델",
+    "tts": "음성 서비스"
   }
 }
diff --git a/locales/ru_RU/chat.json b/locales/ru_RU/chat.json
index 515eb83cdda9..b6196bb49d26 100644
--- a/locales/ru_RU/chat.json
+++ b/locales/ru_RU/chat.json
@@ -35,6 +35,11 @@
     "withSystemRole": "С указанием роли помощника"
   },
   "stop": "Остановить",
+  "stt": {
+    "action": "Голосовой ввод",
+    "loading": "Идет распознавание...",
+    "prettifying": "Форматирование..."
+  },
   "temp": "Временный",
   "tokenDetail": "Роль помощника: {{systemRoleToken}} · История сообщений: {{chatsToken}}",
   "tokenTag": {
@@ -57,9 +62,14 @@
     "openNewTopic": "Открыть новую тему"
   },
   "translate": {
-    "clear": "Очистить перевод"
+    "clear": "Очистить перевод",
+    "action": "перевести"
   },
   "translateTo": "Перевести на",
+  "tts": {
+    "action": "воспроизвести речь",
+    "clear": "очистить речь"
+  },
   "updateAgent": "Обновить информацию о помощнике",
   "upload": {
     "actionTooltip": "Загрузить изображение",
diff --git a/locales/ru_RU/error.json b/locales/ru_RU/error.json
index a6afeb1ec27a..b2e9c19e9864 100644
--- a/locales/ru_RU/error.json
+++ b/locales/ru_RU/error.json
@@ -28,6 +28,12 @@
     "PluginServerError": "Запрос сервера плагина возвратил ошибку. Проверьте файл манифеста плагина, конфигурацию плагина или реализацию сервера на основе информации об ошибке ниже",
     "NoAPIKey": "Ключ OpenAI API пуст, пожалуйста, добавьте свой собственный ключ OpenAI API"
   },
+  "stt": {
+    "responseError": "Ошибка запроса сервиса. Пожалуйста, проверьте конфигурацию или повторите попытку"
+  },
+  "tts": {
+    "responseError": "Ошибка запроса сервиса. Пожалуйста, проверьте конфигурацию или повторите попытку"
+  },
   "unlock": {
     "apikey": {
       "title": "Использовать собственный ключ API",
diff --git a/locales/ru_RU/setting.json b/locales/ru_RU/setting.json
index 2d9dc2add180..66a714604b42 100644
--- a/locales/ru_RU/setting.json
+++ b/locales/ru_RU/setting.json
@@ -207,6 +207,44 @@
     },
     "title": "Настройки системы"
   },
+  "settingTTS": {
+    "showAllLocaleVoice": {
+      "desc": "Если отключено, отображаются только голоса текущего языка",
+      "title": "Показать все голоса локали"
+    },
+    "sttService": {
+      "desc": "где broswer - это встроенная в браузер служба распознавания речи",
+      "title": "Служба распознавания речи"
+    },
+    "title": "Служба речи",
+    "ttsService": {
+      "desc": "Если используется услуга синтеза речи OpenAI, убедитесь, что услуга модели OpenAI включена",
+      "title": "Служба синтеза речи"
+    },
+    "voice": {
+      "title": "Голосовой синтез",
+      "desc": "Выберите голос для текущего помощника, различные службы TTS поддерживают разные источники звука",
+      "preview": "Предварительный просмотр голоса"
+    },
+    "openai": {
+      "sttModel": "Модель распознавания речи OpenAI",
+      "ttsModel": "Модель синтеза речи OpenAI"
+    },
+    "stt": "Настройки распознавания речи",
+    "sttLocale": {
+      "desc": "Язык речи для ввода речи, этот параметр может повысить точность распознавания речи",
+      "title": "Язык распознавания речи"
+    },
+    "sttPersisted": {
+      "desc": "При включении распознавание речи не будет автоматически завершаться, необходимо вручную нажать кнопку завершения",
+      "title": "Вручную завершить распознавание речи"
+    },
+    "tts": "Настройки синтеза речи",
+    "sttAutoStop": {
+      "desc": "После отключения распознавания речи оно не будет автоматически останавливаться, вам нужно будет вручную нажать кнопку завершения",
+      "title": "Автоматическое завершение распознавания речи"
+    }
+  },
   "settingTheme": {
     "avatar": {
       "title": "Аватар"
@@ -245,6 +283,7 @@
   "tab": {
     "agent": "Помощник по умолчанию",
     "common": "Общие настройки",
-    "llm": "Пользовательский API GPT"
+    "llm": "Пользовательский API GPT",
+    "tts": "Сервис речи"
   }
 }
diff --git a/locales/zh_CN/chat.json b/locales/zh_CN/chat.json
index 16c96ee68fa2..133400a5b1c8 100644
--- a/locales/zh_CN/chat.json
+++ b/locales/zh_CN/chat.json
@@ -35,6 +35,11 @@
     "withSystemRole": "包含助手角色设定"
   },
   "stop": "停止",
+  "stt": {
+    "action": "语音输入",
+    "loading": "识别中...",
+    "prettifying": "润色中..."
+  },
   "temp": "临时",
   "tokenDetail": "角色设定: {{systemRoleToken}} · 历史消息: {{chatsToken}}",
   "tokenTag": {
@@ -55,9 +60,13 @@
     "title": "话题列表"
   },
   "translate": {
+    "action": "翻译",
     "clear": "删除翻译"
   },
-  "translateTo": "翻译",
+  "tts": {
+    "action": "语音朗读",
+    "clear": "删除语音"
+  },
   "updateAgent": "更新助理信息",
   "upload": {
     "actionTooltip": "上传图片",
diff --git a/locales/zh_CN/error.json b/locales/zh_CN/error.json
index 955ca070064e..cf51ae8e960e 100644
--- a/locales/zh_CN/error.json
+++ b/locales/zh_CN/error.json
@@ -28,6 +28,12 @@
     "OpenAIBizError": "请求 OpenAI 服务出错，请根据以下信息排查或重试",
     "NoAPIKey": "OpenAI API Key 为空，请添加自定义 OpenAI API Key"
   },
+  "stt": {
+    "responseError": "服务请求失败，请检查配置或重试"
+  },
+  "tts": {
+    "responseError": "服务请求失败，请检查配置或重试"
+  },
   "unlock": {
     "apikey": {
       "addProxyUrl": "添加 OpenAI 代理地址（可选）",
diff --git a/locales/zh_CN/setting.json b/locales/zh_CN/setting.json
index ff1d68155cd0..ffb4c62848e9 100644
--- a/locales/zh_CN/setting.json
+++ b/locales/zh_CN/setting.json
@@ -194,6 +194,40 @@
     },
     "title": "系统设置"
   },
+  "settingTTS": {
+    "openai": {
+      "sttModel": "OpenAI 语音识别模型",
+      "ttsModel": "OpenAI 语音合成模型"
+    },
+    "showAllLocaleVoice": {
+      "desc": "关闭则只显示当前语种的声源",
+      "title": "显示所有语种声源"
+    },
+    "stt": "语音识别设置",
+    "sttAutoStop": {
+      "desc": "关闭后，语音识别将不会自动结束，需要手动点击结束按钮",
+      "title": "自动结束语音识别"
+    },
+    "sttLocale": {
+      "desc": "语音输入的语种，此选项可提高语音识别准确率",
+      "title": "语音识别语种"
+    },
+    "sttService": {
+      "desc": "其中 broswer 为浏览器原生的语音识别服务",
+      "title": "语音识别服务"
+    },
+    "title": "语音服务",
+    "tts": "语音合成设置",
+    "ttsService": {
+      "desc": "如使用 OpenAI 语音合成服务，需要保证 OpenAI 模型服务已开启",
+      "title": "语音合成服务"
+    },
+    "voice": {
+      "desc": "为当前助手挑选一个声音，不同 TTS 服务支持的声源不同",
+      "preview": "试听声源",
+      "title": "语音合成声源"
+    }
+  },
   "settingTheme": {
     "avatar": {
       "title": "头像"
@@ -232,6 +266,7 @@
   "tab": {
     "agent": "默认助手",
     "common": "通用设置",
-    "llm": "语言模型"
+    "llm": "语言模型",
+    "tts": "语音服务"
   }
 }
diff --git a/locales/zh_TW/chat.json b/locales/zh_TW/chat.json
index 8e6d4366492d..c78e1c0e7aef 100644
--- a/locales/zh_TW/chat.json
+++ b/locales/zh_TW/chat.json
@@ -35,6 +35,11 @@
     "withSystemRole": "包含助手角色設定"
   },
   "stop": "停止",
+  "stt": {
+    "action": "語音輸入",
+    "loading": "辨識中...",
+    "prettifying": "美化中..."
+  },
   "temp": "臨時",
   "tokenDetail": "角色設定: {{systemRoleToken}} · 歷史訊息: {{chatsToken}}",
   "tokenTag": {
@@ -57,9 +62,14 @@
     "openNewTopic": "開啟新話題"
   },
   "translate": {
-    "clear": "刪除翻譯"
+    "clear": "刪除翻譯",
+    "action": "翻譯"
   },
   "translateTo": "翻譯",
+  "tts": {
+    "action": "語音朗讀",
+    "clear": "清除語音"
+  },
   "updateAgent": "更新助理資訊",
   "upload": {
     "actionTooltip": "上傳圖片",
diff --git a/locales/zh_TW/error.json b/locales/zh_TW/error.json
index ac0843cd6010..88ea5d8ef062 100644
--- a/locales/zh_TW/error.json
+++ b/locales/zh_TW/error.json
@@ -28,6 +28,12 @@
     "PluginServerError": "外掛伺服器請求回傳錯誤。請根據下面的錯誤資訊檢查您的外掛描述檔案、外掛設定或伺服器實作",
     "NoAPIKey": "OpenAI API 金鑰為空，請添加自訂 OpenAI API 金鑰"
   },
+  "stt": {
+    "responseError": "服務請求失敗，請檢查配置或重試"
+  },
+  "tts": {
+    "responseError": "服務請求失敗，請檢查配置或重試"
+  },
   "unlock": {
     "apikey": {
       "title": "使用自定義 API Key",
diff --git a/locales/zh_TW/setting.json b/locales/zh_TW/setting.json
index fe30093188a3..8a35d00e054c 100644
--- a/locales/zh_TW/setting.json
+++ b/locales/zh_TW/setting.json
@@ -207,6 +207,44 @@
     },
     "title": "系統設定"
   },
+  "settingTTS": {
+    "showAllLocaleVoice": {
+      "desc": "關閉則只顯示當前語種的聲源",
+      "title": "顯示所有語種聲源"
+    },
+    "sttService": {
+      "desc": "其中 broswer 為瀏覽器原生的語音識別服務",
+      "title": "語音識別服務"
+    },
+    "title": "語音服務",
+    "ttsService": {
+      "desc": "如使用 OpenAI 語音合成服務，需要保證 OpenAI 模型服務已開啟",
+      "title": "語音合成服務"
+    },
+    "voice": {
+      "title": "語音合成聲源",
+      "desc": "為當前助手挑選一個聲音，不同 TTS 服務支持的聲源不同",
+      "preview": "預覽"
+    },
+    "openai": {
+      "sttModel": "OpenAI 語音識別模型",
+      "ttsModel": "OpenAI 語音合成模型"
+    },
+    "stt": "語音識別設定",
+    "sttLocale": {
+      "desc": "語音輸入的語言，此選項可提高語音識別準確率",
+      "title": "語音識別語言"
+    },
+    "sttPersisted": {
+      "desc": "開啟後，語音識別將不會自動結束，需要手動點擊結束按鈕",
+      "title": "手動結束語音識別"
+    },
+    "tts": "語音合成設定",
+    "sttAutoStop": {
+      "desc": "關閉後，語音識別將不會自動結束，需要手動點擊結束按鈕",
+      "title": "自動結束語音識別"
+    }
+  },
   "settingTheme": {
     "avatar": {
       "title": "頭像"
@@ -245,6 +283,7 @@
   "tab": {
     "agent": "預設助理",
     "common": "通用設定",
-    "llm": "語言模型"
+    "llm": "語言模型",
+    "tts": "語音服務"
   }
 }
diff --git a/next.config.mjs b/next.config.mjs
index 0c9d64371133..aace97764136 100644
--- a/next.config.mjs
+++ b/next.config.mjs
@@ -40,25 +40,7 @@ const nextConfig = {
     remotePatterns: [
       {
         hostname: 'registry.npmmirror.com',
-        pathname: '/@lobehub/assets-emoji/1.3.0/files/assets/**',
-        port: '',
-        protocol: 'https',
-      },
-      {
-        hostname: 'registry.npmmirror.com',
-        pathname: '/@lobehub/assets-emoji-anim/1.0.0/files/assets/**',
-        port: '',
-        protocol: 'https',
-      },
-      {
-        hostname: 'registry.npmmirror.com',
-        pathname: '/@lobehub/assets-logo/1.1.0/files/assets/**',
-        port: '',
-        protocol: 'https',
-      },
-      {
-        hostname: 'registry.npmmirror.com',
-        pathname: '/@lobehub/assets-favicons/latest/files/assets/**',
+        pathname: '/@lobehub/**',
         port: '',
         protocol: 'https',
       },
@@ -69,7 +51,7 @@ const nextConfig = {
 
   reactStrictMode: true,
 
-  transpilePackages: ['antd-style', '@lobehub/ui'],
+  transpilePackages: ['antd-style', '@lobehub/ui', '@lobehub/tts'],
 
   webpack(config) {
     config.experiments = {
diff --git a/package.json b/package.json
index eaddd5a1d765..5f2ba0388973 100644
--- a/package.json
+++ b/package.json
@@ -70,6 +70,7 @@
     "@icons-pack/react-simple-icons": "^9",
     "@lobehub/chat-plugin-sdk": "latest",
     "@lobehub/chat-plugins-gateway": "latest",
+    "@lobehub/tts": "latest",
     "@lobehub/ui": "latest",
     "@vercel/analytics": "^1",
     "ahooks": "^3",
diff --git a/src/app/api/openai/chat/route.ts b/src/app/api/openai/chat/route.ts
index 050a325a952a..7d38c6e5cd71 100644
--- a/src/app/api/openai/chat/route.ts
+++ b/src/app/api/openai/chat/route.ts
@@ -1,14 +1,6 @@
-import OpenAI from 'openai';
-
-import { getServerConfig } from '@/config/server';
-import { getOpenAIAuthFromRequest } from '@/const/fetch';
-import { ChatErrorType, ErrorType } from '@/types/fetch';
 import { OpenAIChatStreamPayload } from '@/types/openai/chat';
 
-import { checkAuth } from '../../auth';
-import { createAzureOpenai } from '../createAzureOpenai';
-import { createOpenai } from '../createOpenai';
-import { createErrorResponse } from '../errorResponse';
+import { createBizOpenAI } from '../createBizOpenAI';
 import { createChatCompletion } from './createChatCompletion';
 
 export const runtime = 'edge';
@@ -16,38 +8,10 @@ export const runtime = 'edge';
 export const POST = async (req: Request) => {
   const payload = (await req.json()) as OpenAIChatStreamPayload;
 
-  const { apiKey, accessCode, endpoint, useAzure, apiVersion } = getOpenAIAuthFromRequest(req);
-
-  const result = checkAuth({ accessCode, apiKey });
-
-  if (!result.auth) {
-    return createErrorResponse(result.error as ErrorType);
-  }
-
-  let openai: OpenAI;
-
-  const { USE_AZURE_OPENAI } = getServerConfig();
-  const useAzureOpenAI = useAzure || USE_AZURE_OPENAI;
-
-  try {
-    if (useAzureOpenAI) {
-      openai = createAzureOpenai({
-        apiVersion,
-        endpoint,
-        model: payload.model,
-        userApiKey: apiKey,
-      });
-    } else {
-      openai = createOpenai(apiKey, endpoint);
-    }
-  } catch (error) {
-    if ((error as Error).cause === ChatErrorType.NoAPIKey) {
-      return createErrorResponse(ChatErrorType.NoAPIKey);
-    }
+  const openaiOrErrResponse = createBizOpenAI(req, payload.model);
 
-    console.error(error); // log error to trace it
-    return createErrorResponse(ChatErrorType.InternalServerError);
-  }
+  // if resOrOpenAI is a Response, it means there is an error,just return it
+  if (openaiOrErrResponse instanceof Response) return openaiOrErrResponse;
 
-  return createChatCompletion({ openai, payload });
+  return createChatCompletion({ openai: openaiOrErrResponse, payload });
 };
diff --git a/src/app/api/openai/createAzureOpenai.ts b/src/app/api/openai/createBizOpenAI/createAzureOpenai.ts
similarity index 97%
rename from src/app/api/openai/createAzureOpenai.ts
rename to src/app/api/openai/createBizOpenAI/createAzureOpenai.ts
index 178f0fd40f17..0905f0818e1e 100644
--- a/src/app/api/openai/createAzureOpenai.ts
+++ b/src/app/api/openai/createBizOpenAI/createAzureOpenai.ts
@@ -4,7 +4,7 @@ import urlJoin from 'url-join';
 import { getServerConfig } from '@/config/server';
 import { ChatErrorType } from '@/types/fetch';
 
-// 创建 Azure OpenAI 实例
+// create Azure OpenAI Instance
 export const createAzureOpenai = (params: {
   apiVersion?: string | null;
   endpoint?: string | null;
diff --git a/src/app/api/openai/createOpenai.ts b/src/app/api/openai/createBizOpenAI/createOpenai.ts
similarity index 95%
rename from src/app/api/openai/createOpenai.ts
rename to src/app/api/openai/createBizOpenAI/createOpenai.ts
index 19bff84c4d80..95a6422592c8 100644
--- a/src/app/api/openai/createOpenai.ts
+++ b/src/app/api/openai/createBizOpenAI/createOpenai.ts
@@ -3,7 +3,7 @@ import OpenAI from 'openai';
 import { getServerConfig } from '@/config/server';
 import { ChatErrorType } from '@/types/fetch';
 
-// 创建 OpenAI 实例
+// create OpenAI instance
 export const createOpenai = (userApiKey: string | null, endpoint?: string | null) => {
   const { OPENAI_API_KEY, OPENAI_PROXY_URL } = getServerConfig();
 
diff --git a/src/app/api/openai/createBizOpenAI/index.ts b/src/app/api/openai/createBizOpenAI/index.ts
new file mode 100644
index 000000000000..dbf515e01c6a
--- /dev/null
+++ b/src/app/api/openai/createBizOpenAI/index.ts
@@ -0,0 +1,46 @@
+import OpenAI from 'openai/index';
+
+import { checkAuth } from '@/app/api/auth';
+import { getServerConfig } from '@/config/server';
+import { getOpenAIAuthFromRequest } from '@/const/fetch';
+import { ChatErrorType, ErrorType } from '@/types/fetch';
+
+import { createErrorResponse } from '../errorResponse';
+import { createAzureOpenai } from './createAzureOpenai';
+import { createOpenai } from './createOpenai';
+
+/**
+ * createOpenAI Instance with Auth and azure openai support
+ * if auth not pass ,just return error response
+ */
+export const createBizOpenAI = (req: Request, model: string): Response | OpenAI => {
+  const { apiKey, accessCode, endpoint, useAzure, apiVersion } = getOpenAIAuthFromRequest(req);
+
+  const result = checkAuth({ accessCode, apiKey });
+
+  if (!result.auth) {
+    return createErrorResponse(result.error as ErrorType);
+  }
+
+  let openai: OpenAI;
+
+  const { USE_AZURE_OPENAI } = getServerConfig();
+  const useAzureOpenAI = useAzure || USE_AZURE_OPENAI;
+
+  try {
+    if (useAzureOpenAI) {
+      openai = createAzureOpenai({ apiVersion, endpoint, model, userApiKey: apiKey });
+    } else {
+      openai = createOpenai(apiKey, endpoint);
+    }
+  } catch (error) {
+    if ((error as Error).cause === ChatErrorType.NoAPIKey) {
+      return createErrorResponse(ChatErrorType.NoAPIKey);
+    }
+
+    console.error(error); // log error to trace it
+    return createErrorResponse(ChatErrorType.InternalServerError);
+  }
+
+  return openai;
+};
diff --git a/src/app/api/openai/models/route.ts b/src/app/api/openai/models/route.ts
index bfe6eeada0c2..c9827d0e5241 100644
--- a/src/app/api/openai/models/route.ts
+++ b/src/app/api/openai/models/route.ts
@@ -1,6 +1,6 @@
 import { getOpenAIAuthFromRequest } from '@/const/fetch';
 
-import { createOpenai } from '../createOpenai';
+import { createOpenai } from '../createBizOpenAI/createOpenai';
 
 export const runtime = 'edge';
 
diff --git a/src/app/api/openai/stt/route.ts b/src/app/api/openai/stt/route.ts
new file mode 100644
index 000000000000..13e163d57eef
--- /dev/null
+++ b/src/app/api/openai/stt/route.ts
@@ -0,0 +1,29 @@
+import { OpenAISTTPayload } from '@lobehub/tts';
+import { createOpenaiAudioTranscriptions } from '@lobehub/tts/server';
+
+import { createBizOpenAI } from '../createBizOpenAI';
+
+export const runtime = 'edge';
+
+export const POST = async (req: Request) => {
+  const formData = await req.formData();
+  const speechBlob = formData.get('speech') as Blob;
+  const optionsString = formData.get('options') as string;
+  const payload = {
+    options: JSON.parse(optionsString),
+    speech: speechBlob,
+  } as OpenAISTTPayload;
+
+  const openaiOrErrResponse = createBizOpenAI(req, payload.options.model);
+
+  // if resOrOpenAI is a Response, it means there is an error,just return it
+  if (openaiOrErrResponse instanceof Response) return openaiOrErrResponse;
+
+  const res = await createOpenaiAudioTranscriptions({ openai: openaiOrErrResponse, payload });
+
+  return new Response(JSON.stringify(res), {
+    headers: {
+      'content-type': 'application/json;charset=UTF-8',
+    },
+  });
+};
diff --git a/src/app/api/openai/tts/route.ts b/src/app/api/openai/tts/route.ts
new file mode 100644
index 000000000000..f878d75dd988
--- /dev/null
+++ b/src/app/api/openai/tts/route.ts
@@ -0,0 +1,17 @@
+import { OpenAITTSPayload } from '@lobehub/tts';
+import { createOpenaiAudioSpeech } from '@lobehub/tts/server';
+
+import { createBizOpenAI } from '../createBizOpenAI';
+
+export const runtime = 'edge';
+
+export const POST = async (req: Request) => {
+  const payload = (await req.json()) as OpenAITTSPayload;
+
+  const openaiOrErrResponse = createBizOpenAI(req, payload.options.model);
+
+  // if resOrOpenAI is a Response, it means there is an error,just return it
+  if (openaiOrErrResponse instanceof Response) return openaiOrErrResponse;
+
+  return await createOpenaiAudioSpeech({ openai: openaiOrErrResponse, payload });
+};
diff --git a/src/app/api/tts/edge-speech/route.ts b/src/app/api/tts/edge-speech/route.ts
new file mode 100644
index 000000000000..c8df174ba382
--- /dev/null
+++ b/src/app/api/tts/edge-speech/route.ts
@@ -0,0 +1,9 @@
+import { EdgeSpeechPayload, EdgeSpeechTTS } from '@lobehub/tts';
+
+export const runtime = 'edge';
+
+export const POST = async (req: Request) => {
+  const payload = (await req.json()) as EdgeSpeechPayload;
+
+  return await EdgeSpeechTTS.createRequest({ payload });
+};
diff --git a/src/app/api/tts/microsoft-speech/route.ts b/src/app/api/tts/microsoft-speech/route.ts
new file mode 100644
index 000000000000..5e37c5d32b50
--- /dev/null
+++ b/src/app/api/tts/microsoft-speech/route.ts
@@ -0,0 +1,9 @@
+import { MicrosoftSpeechPayload, MicrosoftSpeechTTS } from '@lobehub/tts';
+
+export const runtime = 'edge';
+
+export const POST = async (req: Request) => {
+  const payload = (await req.json()) as MicrosoftSpeechPayload;
+
+  return await MicrosoftSpeechTTS.createRequest({ payload });
+};
diff --git a/src/app/chat/(desktop)/features/ChatHeader.tsx b/src/app/chat/(desktop)/features/ChatHeader.tsx
index 34c90b412a0b..3a3337c5347c 100644
--- a/src/app/chat/(desktop)/features/ChatHeader.tsx
+++ b/src/app/chat/(desktop)/features/ChatHeader.tsx
@@ -7,6 +7,7 @@ import { memo } from 'react';
 import { useTranslation } from 'react-i18next';
 import { Flexbox } from 'react-layout-kit';
 
+import { DESKTOP_HEADER_ICON_SIZE } from '@/const/layoutTokens';
 import { useGlobalStore } from '@/store/global';
 import { useSessionChatInit, useSessionStore } from '@/store/session';
 import { agentSelectors, sessionSelectors } from '@/store/session/selectors';
@@ -85,7 +86,7 @@ const Header = memo(() => {
           <ActionIcon
             icon={showAgentSettings ? PanelRightClose : PanelRightOpen}
             onClick={() => toggleConfig()}
-            size={{ fontSize: 24 }}
+            size={DESKTOP_HEADER_ICON_SIZE}
             title={t('roleAndArchive')}
           />
           {!isInbox && (
@@ -94,7 +95,7 @@ const Header = memo(() => {
               onClick={() => {
                 router.push(pathString('/chat/settings', { hash: location.hash }));
               }}
-              size={{ fontSize: 24 }}
+              size={DESKTOP_HEADER_ICON_SIZE}
               title={t('header.session', { ns: 'setting' })}
             />
           )}
diff --git a/src/app/chat/(desktop)/features/ChatInput/Footer.tsx b/src/app/chat/(desktop)/features/ChatInput/Footer.tsx
new file mode 100644
index 000000000000..b291a0583c44
--- /dev/null
+++ b/src/app/chat/(desktop)/features/ChatInput/Footer.tsx
@@ -0,0 +1,62 @@
+import { Icon } from '@lobehub/ui';
+import { Button } from 'antd';
+import { createStyles } from 'antd-style';
+import { ArrowBigUp, CornerDownLeft, Loader2 } from 'lucide-react';
+import { memo } from 'react';
+import { useTranslation } from 'react-i18next';
+import { Flexbox } from 'react-layout-kit';
+
+import SaveTopic from '@/app/chat/features/ChatInput/Topic';
+import { useSendMessage } from '@/app/chat/features/ChatInput/useSend';
+import { useSessionStore } from '@/store/session';
+
+const useStyles = createStyles(({ css }) => ({
+  footerBar: css`
+    display: flex;
+    flex: none;
+    gap: 8px;
+    align-items: center;
+    justify-content: flex-end;
+
+    padding: 0 24px;
+  `,
+}));
+
+const Footer = memo(() => {
+  const { t } = useTranslation('chat');
+  const { styles, theme } = useStyles();
+  const [loading, onStop] = useSessionStore((s) => [!!s.chatLoadingId, s.stopGenerateMessage]);
+
+  const onSend = useSendMessage();
+
+  return (
+    <div className={styles.footerBar}>
+      <Flexbox
+        gap={4}
+        horizontal
+        style={{ color: theme.colorTextDescription, fontSize: 12, marginRight: 12 }}
+      >
+        <Icon icon={CornerDownLeft} />
+        <span>{t('send')}</span>
+        <span>/</span>
+        <Flexbox horizontal>
+          <Icon icon={ArrowBigUp} />
+          <Icon icon={CornerDownLeft} />
+        </Flexbox>
+        <span>{t('warp')}</span>
+      </Flexbox>
+      <SaveTopic />
+      {loading ? (
+        <Button icon={loading && <Icon icon={Loader2} spin />} onClick={onStop}>
+          {t('stop')}
+        </Button>
+      ) : (
+        <Button onClick={() => onSend()} type={'primary'}>
+          {t('send')}
+        </Button>
+      )}
+    </div>
+  );
+});
+
+export default Footer;
diff --git a/src/app/chat/(desktop)/features/ChatInput/index.tsx b/src/app/chat/(desktop)/features/ChatInput/index.tsx
index 2b923c74c69c..55bac43bbb4e 100644
--- a/src/app/chat/(desktop)/features/ChatInput/index.tsx
+++ b/src/app/chat/(desktop)/features/ChatInput/index.tsx
@@ -47,7 +47,6 @@ const ChatInputDesktopLayout = memo(() => {
         minHeight={CHAT_TEXTAREA_HEIGHT}
         onSizeChange={(_, size) => {
           if (!size) return;
-
           updatePreference({
             inputHeight:
               typeof size.height === 'string' ? Number.parseInt(size.height) : size.height,
diff --git a/src/app/chat/(desktop)/features/SessionHeader.tsx b/src/app/chat/(desktop)/features/SessionHeader.tsx
index 9789f00582ca..2c40470f6eb4 100644
--- a/src/app/chat/(desktop)/features/SessionHeader.tsx
+++ b/src/app/chat/(desktop)/features/SessionHeader.tsx
@@ -5,6 +5,7 @@ import { memo } from 'react';
 import { useTranslation } from 'react-i18next';
 import { Flexbox } from 'react-layout-kit';
 
+import { DESKTOP_HEADER_ICON_SIZE } from '@/const/layoutTokens';
 import { useSessionStore } from '@/store/session';
 
 import SessionSearchBar from '../../features/SessionSearchBar';
@@ -28,11 +29,10 @@ const Header = memo(() => {
     <Flexbox className={styles.top} gap={16} padding={16}>
       <Flexbox distribution={'space-between'} horizontal>
         <Logo className={styles.logo} size={36} type={'text'} />
-
         <ActionIcon
           icon={MessageSquarePlus}
           onClick={() => createSession()}
-          size={{ fontSize: 24 }}
+          size={DESKTOP_HEADER_ICON_SIZE}
           style={{ flex: 'none' }}
           title={t('newAgent')}
         />
diff --git a/src/app/chat/(mobile)/features/ChatHeader.tsx b/src/app/chat/(mobile)/features/ChatHeader.tsx
index a4f6725a8e27..1767a0ba98b9 100644
--- a/src/app/chat/(mobile)/features/ChatHeader.tsx
+++ b/src/app/chat/(mobile)/features/ChatHeader.tsx
@@ -1,7 +1,8 @@
-import { ActionIcon, MobileNavBar, MobileNavBarTitle } from '@lobehub/ui';
-import { Clock3, Settings } from 'lucide-react';
+import { ActionIcon, Icon, MobileNavBar, MobileNavBarTitle } from '@lobehub/ui';
+import { Dropdown, MenuProps } from 'antd';
+import { Clock3, MoreHorizontal, Settings, Share2 } from 'lucide-react';
 import { useRouter } from 'next/navigation';
-import { memo } from 'react';
+import { memo, useState } from 'react';
 import { useTranslation } from 'react-i18next';
 
 import { MOBILE_HEADER_ICON_SIZE } from '@/const/layoutTokens';
@@ -15,6 +16,7 @@ import ShareButton from '../../features/ChatHeader/ShareButton';
 const MobileHeader = memo(() => {
   const { t } = useTranslation('chat');
   const router = useRouter();
+  const [open, setOpen] = useState(false);
 
   const [isInbox, title] = useSessionStore((s) => [
     sessionSelectors.isInboxSession(s),
@@ -25,21 +27,37 @@ const MobileHeader = memo(() => {
 
   const displayTitle = isInbox ? t('inbox.title') : title;
 
+  const items: MenuProps['items'] = [
+    {
+      icon: <Icon icon={Share2} />,
+      key: 'share',
+      label: t('share', { ns: 'common' }),
+      onClick: () => setOpen(true),
+    },
+    !isInbox && {
+      icon: <Icon icon={Settings} />,
+      key: 'settings',
+      label: t('header.session', { ns: 'setting' }),
+      onClick: () => router.push(pathString('/chat/settings', { hash: location.hash })),
+    },
+  ].filter(Boolean) as MenuProps['items'];
+
   return (
     <MobileNavBar
       center={<MobileNavBarTitle title={displayTitle} />}
       onBackClick={() => router.push('/chat')}
       right={
         <>
-          <ShareButton />
           <ActionIcon icon={Clock3} onClick={() => toggleConfig()} size={MOBILE_HEADER_ICON_SIZE} />
-          {!isInbox && (
-            <ActionIcon
-              icon={Settings}
-              onClick={() => router.push(pathString('/chat/settings', { hash: location.hash }))}
-              size={MOBILE_HEADER_ICON_SIZE}
-            />
-          )}
+          <ShareButton mobile open={open} setOpen={setOpen} />
+          <Dropdown
+            menu={{
+              items,
+            }}
+            trigger={['click']}
+          >
+            <ActionIcon icon={MoreHorizontal} />
+          </Dropdown>
         </>
       }
       showBackButton
diff --git a/src/app/chat/(mobile)/features/ChatInput/Mobile.tsx b/src/app/chat/(mobile)/features/ChatInput/Mobile.tsx
index c49352d492f6..69a96286940a 100644
--- a/src/app/chat/(mobile)/features/ChatInput/Mobile.tsx
+++ b/src/app/chat/(mobile)/features/ChatInput/Mobile.tsx
@@ -1,20 +1,36 @@
+import { createStyles } from 'antd-style';
+import { rgba } from 'polished';
 import { memo } from 'react';
 import { Flexbox } from 'react-layout-kit';
 
 import ActionBar from '@/app/chat/features/ChatInput/ActionBar';
 import InputAreaInner from '@/app/chat/features/ChatInput/InputAreaInner';
+import STT from '@/app/chat/features/ChatInput/STT';
 import SaveTopic from '@/app/chat/features/ChatInput/Topic';
 
 import SendButton from './SendButton';
-import { useStyles } from './style.mobile';
+
+const useStyles = createStyles(({ css, token }) => {
+  return {
+    container: css`
+      padding: 12px 0;
+      background: ${token.colorBgLayout};
+      border-top: 1px solid ${rgba(token.colorBorder, 0.25)};
+    `,
+    inner: css`
+      padding: 0 8px;
+    `,
+  };
+});
 
 const ChatInputArea = memo(() => {
-  const { cx, styles } = useStyles();
+  const { styles } = useStyles();
 
   return (
-    <Flexbox className={cx(styles.container)} gap={12}>
-      <ActionBar rightAreaStartRender={<SaveTopic />} />
+    <Flexbox className={styles.container} gap={12}>
+      <ActionBar mobile padding={'0 8px'} rightAreaStartRender={<SaveTopic mobile />} />
       <Flexbox className={styles.inner} gap={8} horizontal>
+        <STT mobile />
         <InputAreaInner mobile />
         <SendButton />
       </Flexbox>
diff --git a/src/app/chat/(mobile)/features/ChatInput/style.mobile.ts b/src/app/chat/(mobile)/features/ChatInput/style.mobile.ts
deleted file mode 100644
index cfe44042d0cf..000000000000
--- a/src/app/chat/(mobile)/features/ChatInput/style.mobile.ts
+++ /dev/null
@@ -1,19 +0,0 @@
-import { createStyles } from 'antd-style';
-import { rgba } from 'polished';
-
-export const useStyles = createStyles(({ css, token }) => {
-  return {
-    container: css`
-      padding: 12px 0;
-      background: ${token.colorBgLayout};
-      border-top: 1px solid ${rgba(token.colorBorder, 0.25)};
-    `,
-    inner: css`
-      padding: 0 16px;
-    `,
-    input: css`
-      background: ${token.colorFillSecondary} !important;
-      border: none !important;
-    `,
-  };
-});
diff --git a/src/app/chat/features/ChatHeader/ShareButton/index.tsx b/src/app/chat/features/ChatHeader/ShareButton/index.tsx
index f45f261ee7c9..3eaa9f8c677c 100644
--- a/src/app/chat/features/ChatHeader/ShareButton/index.tsx
+++ b/src/app/chat/features/ChatHeader/ShareButton/index.tsx
@@ -1,32 +1,40 @@
 import { ActionIcon, Modal } from '@lobehub/ui';
-import { useResponsive } from 'antd-style';
 import { Share2 } from 'lucide-react';
 import dynamic from 'next/dynamic';
-import { memo, useState } from 'react';
+import { memo } from 'react';
 import { useTranslation } from 'react-i18next';
+import useMergeState from 'use-merge-value';
 
-import { MOBILE_HEADER_ICON_SIZE } from '@/const/layoutTokens';
+import { DESKTOP_HEADER_ICON_SIZE } from '@/const/layoutTokens';
 import { useSessionStore } from '@/store/session';
 
 const Inner = dynamic(() => import('./Inner'));
+interface ShareButtonProps {
+  mobile?: boolean;
+  open?: boolean;
+  setOpen?: (open: boolean) => void;
+}
 
-const ShareButton = memo(() => {
-  const [isModalOpen, setIsModalOpen] = useState(false);
+const ShareButton = memo<ShareButtonProps>(({ mobile, setOpen, open }) => {
+  const [isModalOpen, setIsModalOpen] = useMergeState(false, {
+    defaultValue: false,
+    onChange: setOpen,
+    value: open,
+  });
   const { t } = useTranslation('common');
   const [shareLoading] = useSessionStore((s) => [s.shareLoading]);
-  const { mobile } = useResponsive();
-
-  const size = mobile ? MOBILE_HEADER_ICON_SIZE : { fontSize: 24 };
 
   return (
     <>
-      <ActionIcon
-        icon={Share2}
-        loading={shareLoading}
-        onClick={() => setIsModalOpen(true)}
-        size={size}
-        title={t('share')}
-      />
+      {!mobile && (
+        <ActionIcon
+          icon={Share2}
+          loading={shareLoading}
+          onClick={() => setIsModalOpen(true)}
+          size={DESKTOP_HEADER_ICON_SIZE}
+          title={t('share')}
+        />
+      )}
       <Modal
         centered={false}
         footer={null}
diff --git a/src/app/chat/features/ChatInput/ActionBar/config.ts b/src/app/chat/features/ChatInput/ActionBar/config.ts
index f170285629ba..45e86a5048b5 100644
--- a/src/app/chat/features/ChatInput/ActionBar/config.ts
+++ b/src/app/chat/features/ChatInput/ActionBar/config.ts
@@ -1,5 +1,4 @@
-import { FC } from 'react';
-
+import STT from '../STT';
 import Clear from './Clear';
 import FileUpload from './FileUpload';
 import History from './History';
@@ -7,15 +6,26 @@ import ModelSwitch from './ModelSwitch';
 import Temperature from './Temperature';
 import Token from './Token';
 
-export const actionMap: Record<string, FC> = {
+export const actionMap = {
   clear: Clear,
   fileUpload: FileUpload,
   history: History,
   model: ModelSwitch,
+  stt: STT,
   temperature: Temperature,
   token: Token,
-};
+} as const;
+
+type ActionMap = typeof actionMap;
+
+export type ActionKeys = keyof ActionMap;
+
+type getActionList = (mobile?: boolean) => ActionKeys[];
 
 // we can make these action lists configurable in the future
-export const leftActionList = ['model', 'fileUpload', 'temperature', 'history', 'token'];
-export const rightActionList = ['clear'];
+export const getLeftActionList: getActionList = (mobile) =>
+  ['model', 'fileUpload', 'temperature', 'history', !mobile && 'stt', 'token'].filter(
+    Boolean,
+  ) as ActionKeys[];
+
+export const getRightActionList: getActionList = () => ['clear'].filter(Boolean) as ActionKeys[];
diff --git a/src/app/chat/features/ChatInput/ActionBar/index.tsx b/src/app/chat/features/ChatInput/ActionBar/index.tsx
index 7a72dc23ea0d..e257261ae72f 100644
--- a/src/app/chat/features/ChatInput/ActionBar/index.tsx
+++ b/src/app/chat/features/ChatInput/ActionBar/index.tsx
@@ -1,9 +1,9 @@
-import { ReactNode, memo } from 'react';
+import { ReactNode, memo, useMemo } from 'react';
 import { Flexbox } from 'react-layout-kit';
 
-import { actionMap, leftActionList, rightActionList } from './config';
+import { ActionKeys, actionMap, getLeftActionList, getRightActionList } from './config';
 
-const RenderActionList = ({ dataSource }: { dataSource: string[] }) => (
+const RenderActionList = ({ dataSource }: { dataSource: ActionKeys[] }) => (
   <>
     {dataSource.map((key) => {
       const Render = actionMap[key];
@@ -13,23 +13,47 @@ const RenderActionList = ({ dataSource }: { dataSource: string[] }) => (
 );
 
 export interface ActionBarProps {
+  leftAreaEndRender?: ReactNode;
+  leftAreaStartRender?: ReactNode;
+  mobile?: boolean;
+  padding?: number | string;
   rightAreaEndRender?: ReactNode;
   rightAreaStartRender?: ReactNode;
 }
 
-const ActionBar = memo<ActionBarProps>(({ rightAreaStartRender, rightAreaEndRender }) => {
-  return (
-    <Flexbox align={'center'} flex={'none'} horizontal justify={'space-between'} padding={'0 16px'}>
-      <Flexbox align={'center'} flex={1} gap={4} horizontal>
-        <RenderActionList dataSource={leftActionList} />
-      </Flexbox>
-      <Flexbox align={'center'} flex={0} gap={4} horizontal justify={'flex-end'}>
-        {rightAreaStartRender}
-        <RenderActionList dataSource={rightActionList} />
-        {rightAreaEndRender}
+const ActionBar = memo<ActionBarProps>(
+  ({
+    padding = '0 16px',
+    mobile,
+    rightAreaStartRender,
+    rightAreaEndRender,
+    leftAreaStartRender,
+    leftAreaEndRender,
+  }) => {
+    const leftActionList = useMemo(() => getLeftActionList(mobile), [mobile]);
+    const rightActionList = useMemo(() => getRightActionList(mobile), [mobile]);
+
+    return (
+      <Flexbox
+        align={'center'}
+        flex={'none'}
+        horizontal
+        justify={'space-between'}
+        padding={padding}
+      >
+        <Flexbox align={'center'} flex={1} gap={4} horizontal>
+          {leftAreaStartRender}
+          <RenderActionList dataSource={leftActionList} />
+          {leftAreaEndRender}
+        </Flexbox>
+        <Flexbox align={'center'} flex={0} gap={4} horizontal justify={'flex-end'}>
+          {rightAreaStartRender}
+          <RenderActionList dataSource={rightActionList} />
+          {rightAreaEndRender}
+        </Flexbox>
       </Flexbox>
-    </Flexbox>
-  );
-});
+    );
+  },
+);
 
 export default ActionBar;
diff --git a/src/app/chat/features/ChatInput/STT/index.tsx b/src/app/chat/features/ChatInput/STT/index.tsx
new file mode 100644
index 000000000000..981bdba6ab9f
--- /dev/null
+++ b/src/app/chat/features/ChatInput/STT/index.tsx
@@ -0,0 +1,146 @@
+import { ActionIcon, Alert, Highlighter, Icon } from '@lobehub/ui';
+import { Button, Dropdown } from 'antd';
+import { createStyles } from 'antd-style';
+import { Mic, MicOff } from 'lucide-react';
+import { memo, useCallback, useState } from 'react';
+import { useTranslation } from 'react-i18next';
+import { Flexbox } from 'react-layout-kit';
+
+import { useSTT } from '@/hooks/useSTT';
+import { useSessionStore } from '@/store/session';
+import { ChatMessageError } from '@/types/chatMessage';
+import { getMessageError } from '@/utils/fetch';
+
+const useStyles = createStyles(({ css, token }) => ({
+  recording: css`
+    width: 8px;
+    height: 8px;
+    background: ${token.colorError};
+    border-radius: 50%;
+  `,
+}));
+
+const STT = memo<{ mobile?: boolean }>(({ mobile }) => {
+  const [error, setError] = useState<ChatMessageError>();
+  const { t } = useTranslation('chat');
+  const { styles } = useStyles();
+
+  const [loading, updateInputMessage] = useSessionStore((s) => [
+    !!s.chatLoadingId,
+    s.updateInputMessage,
+  ]);
+
+  const setDefaultError = useCallback(
+    (err?: any) => {
+      setError({ body: err, message: t('stt.responseError', { ns: 'error' }), type: 500 });
+    },
+    [t],
+  );
+
+  const { start, isLoading, stop, formattedTime, time, response, isRecording } = useSTT({
+    onError: (err) => {
+      stop();
+      setDefaultError(err);
+    },
+    onErrorRetry: (err) => {
+      stop();
+      setDefaultError(err);
+    },
+    onSuccess: async () => {
+      if (!response) return;
+      if (response.status === 200) return;
+      const message = await getMessageError(response);
+      if (message) {
+        setError(message);
+      } else {
+        setDefaultError();
+      }
+      stop();
+    },
+    onTextChange: (text) => {
+      if (loading) stop();
+      if (text) updateInputMessage(text);
+    },
+  });
+
+  const icon = isLoading ? MicOff : Mic;
+  const Render: any = !mobile ? ActionIcon : Button;
+  const iconRender: any = !mobile ? icon : <Icon icon={icon} />;
+  const desc = t('stt.action');
+
+  const handleTriggerStartStop = useCallback(() => {
+    if (loading) return;
+    if (!isLoading) {
+      start();
+    } else {
+      stop();
+    }
+  }, [loading, isLoading, start, stop]);
+
+  const handleCloseError = useCallback(() => {
+    setError(undefined);
+    stop();
+  }, [stop]);
+
+  const handleRetry = useCallback(() => {
+    setError(undefined);
+    start();
+  }, [start]);
+
+  return (
+    <Dropdown
+      dropdownRender={
+        error
+          ? () => (
+              <Alert
+                action={
+                  <Button onClick={handleRetry} size={'small'} type={'primary'}>
+                    {t('retry', { ns: 'common' })}
+                  </Button>
+                }
+                closable
+                extra={
+                  error.body && (
+                    <Highlighter copyButtonSize={'small'} language={'json'} type={'pure'}>
+                      {JSON.stringify(error.body, null, 2)}
+                    </Highlighter>
+                  )
+                }
+                message={error.message}
+                onClose={handleCloseError}
+                style={{ alignItems: 'center' }}
+                type="error"
+              />
+            )
+          : undefined
+      }
+      menu={{
+        activeKey: 'time',
+        items: [
+          {
+            key: 'time',
+            label: (
+              <Flexbox align={'center'} gap={8} horizontal>
+                <div className={styles.recording} />
+                {time > 0 ? formattedTime : t(isRecording ? 'stt.loading' : 'stt.prettifying')}
+              </Flexbox>
+            ),
+          },
+        ],
+      }}
+      open={!!error || isRecording || isLoading}
+      placement={mobile ? 'topRight' : 'top'}
+      trigger={['click']}
+    >
+      <Render
+        icon={iconRender}
+        onClick={handleTriggerStartStop}
+        placement={'bottom'}
+        style={{ flex: 'none' }}
+        title={desc}
+      />
+    </Dropdown>
+  );
+});
+
+export default STT;
diff --git a/src/app/chat/features/ChatInput/Topic/index.tsx b/src/app/chat/features/ChatInput/Topic/index.tsx
index b30d7b2f2a41..b6d2df25bda9 100644
--- a/src/app/chat/features/ChatInput/Topic/index.tsx
+++ b/src/app/chat/features/ChatInput/Topic/index.tsx
@@ -1,6 +1,5 @@
 import { ActionIcon, Icon, Tooltip } from '@lobehub/ui';
 import { Button } from 'antd';
-import { useResponsive } from 'antd-style';
 import { LucideGalleryVerticalEnd, LucideMessageSquarePlus } from 'lucide-react';
 import { memo } from 'react';
 import { useHotkeys } from 'react-hotkeys-hook';
@@ -10,13 +9,12 @@ import HotKeys from '@/components/HotKeys';
 import { PREFIX_KEY, SAVE_TOPIC_KEY } from '@/const/hotkeys';
 import { useSessionStore } from '@/store/session';
 
-const SaveTopic = memo(() => {
+const SaveTopic = memo<{ mobile?: boolean }>(({ mobile }) => {
   const { t } = useTranslation('chat');
   const [hasTopic, openNewTopicOrSaveTopic] = useSessionStore((s) => [
     !!s.activeTopicId,
     s.openNewTopicOrSaveTopic,
   ]);
-  const { mobile } = useResponsive();
 
   const icon = hasTopic ? LucideMessageSquarePlus : LucideGalleryVerticalEnd;
   const Render = mobile ? ActionIcon : Button;
diff --git a/src/app/chat/features/Conversation/ChatList/Actions/Assistant.tsx b/src/app/chat/features/Conversation/ChatList/Actions/Assistant.tsx
index 01367c0fdb15..ac74ce5bda56 100644
--- a/src/app/chat/features/Conversation/ChatList/Actions/Assistant.tsx
+++ b/src/app/chat/features/Conversation/ChatList/Actions/Assistant.tsx
@@ -6,14 +6,14 @@ import { useCustomActions } from './customAction';
 
 export const AssistantActionsBar: RenderAction = memo(({ text, id, onActionClick, error }) => {
   const { regenerate, edit, copy, divider, del } = useChatListActionsBar(text);
-  const { translate } = useCustomActions();
+  const { translate, tts } = useCustomActions();
   if (id === 'default') return;
 
   if (error) return <ErrorActionsBar onActionClick={onActionClick} text={text} />;
 
   return (
     <ActionIconGroup
-      dropdownMenu={[edit, copy, regenerate, divider, translate, divider, del]}
+      dropdownMenu={[edit, copy, regenerate, divider, tts, translate, divider, del]}
       items={[regenerate, copy]}
       onActionClick={onActionClick}
       type="ghost"
diff --git a/src/app/chat/features/Conversation/ChatList/Actions/User.tsx b/src/app/chat/features/Conversation/ChatList/Actions/User.tsx
index 90555a6eddac..277fefedd236 100644
--- a/src/app/chat/features/Conversation/ChatList/Actions/User.tsx
+++ b/src/app/chat/features/Conversation/ChatList/Actions/User.tsx
@@ -5,11 +5,11 @@ import { useCustomActions } from './customAction';
 
 export const UserActionsBar: RenderAction = memo(({ text, onActionClick }) => {
   const { regenerate, edit, copy, divider, del } = useChatListActionsBar(text);
-  const { translate } = useCustomActions();
+  const { translate, tts } = useCustomActions();
 
   return (
     <ActionIconGroup
-      dropdownMenu={[edit, copy, regenerate, divider, translate, divider, del]}
+      dropdownMenu={[edit, copy, regenerate, divider, tts, translate, divider, del]}
       items={[regenerate, edit]}
       onActionClick={onActionClick}
       type="ghost"
diff --git a/src/app/chat/features/Conversation/ChatList/Actions/customAction.ts b/src/app/chat/features/Conversation/ChatList/Actions/customAction.ts
index 00b9d7a7b928..ef900f2ad245 100644
--- a/src/app/chat/features/Conversation/ChatList/Actions/customAction.ts
+++ b/src/app/chat/features/Conversation/ChatList/Actions/customAction.ts
@@ -1,5 +1,5 @@
 import { ActionIconGroupItems } from '@lobehub/ui/es/ActionIconGroup';
-import { LanguagesIcon } from 'lucide-react';
+import { LanguagesIcon, Play } from 'lucide-react';
 import { useTranslation } from 'react-i18next';
 
 import { localeOptions } from '@/locales/options';
@@ -14,10 +14,17 @@ export const useCustomActions = () => {
     })),
     icon: LanguagesIcon,
     key: 'translate',
-    label: t('translateTo'),
+    label: t('translate.action'),
+  } as ActionIconGroupItems;
+
+  const tts = {
+    icon: Play,
+    key: 'tts',
+    label: t('tts.action'),
   } as ActionIconGroupItems;
 
   return {
     translate,
+    tts,
   };
 };
diff --git a/src/app/chat/features/Conversation/ChatList/Actions/index.ts b/src/app/chat/features/Conversation/ChatList/Actions/index.ts
index 4c0abb106289..cc07d7229f73 100644
--- a/src/app/chat/features/Conversation/ChatList/Actions/index.ts
+++ b/src/app/chat/features/Conversation/ChatList/Actions/index.ts
@@ -20,10 +20,11 @@ interface ActionsClick {
 }
 
 export const useActionsClick = (): ChatListProps['onActionsClick'] => {
-  const [deleteMessage, resendMessage, translateMessage] = useSessionStore((s) => [
+  const [deleteMessage, resendMessage, translateMessage, ttsMessage] = useSessionStore((s) => [
     s.deleteMessage,
     s.resendMessage,
     s.translateMessage,
+    s.ttsMessage,
   ]);
 
   return (action, { id, error }) => {
@@ -42,6 +43,12 @@ export const useActionsClick = (): ChatListProps['onActionsClick'] => {
         },
         trigger: action.key === 'regenerate',
       },
+      {
+        onClick: () => {
+          ttsMessage(id);
+        },
+        trigger: action.key === 'tts',
+      },
       {
         onClick: () => {
           /**
diff --git a/src/app/chat/features/Conversation/ChatList/Extras/Assistant.tsx b/src/app/chat/features/Conversation/ChatList/Extras/Assistant.tsx
index c416dfedfe61..e6270ad3c71c 100644
--- a/src/app/chat/features/Conversation/ChatList/Extras/Assistant.tsx
+++ b/src/app/chat/features/Conversation/ChatList/Extras/Assistant.tsx
@@ -1,39 +1,45 @@
 import { SiOpenai } from '@icons-pack/react-simple-icons';
 import { RenderMessageExtra, Tag } from '@lobehub/ui';
-import { Divider } from 'antd';
 import { memo } from 'react';
 import { Flexbox } from 'react-layout-kit';
 
 import { useSessionStore } from '@/store/session';
 import { agentSelectors } from '@/store/session/selectors';
+import { ChatMessage } from '@/types/chatMessage';
 
+import ExtraContainer from './ExtraContainer';
+import TTS from './TTS';
 import Translate from './Translate';
 
-export const AssistantMessageExtra: RenderMessageExtra = memo(({ extra, id }) => {
-  const model = useSessionStore(agentSelectors.currentAgentModel);
-
-  const showModelTag = extra?.fromModel && model !== extra?.fromModel;
-  const hasTranslate = !!extra?.translate;
-
-  const showExtra = showModelTag || hasTranslate;
-
-  const loading = useSessionStore((s) => s.chatLoadingId === id);
-
-  if (!showExtra) return;
-
-  return (
-    <Flexbox gap={8} style={{ marginTop: 8 }}>
-      {showModelTag && (
-        <div>
-          <Tag icon={<SiOpenai size={'1em'} />}>{extra?.fromModel as string}</Tag>
-        </div>
-      )}
-      {extra.translate && (
+export const AssistantMessageExtra: RenderMessageExtra = memo<ChatMessage>(
+  ({ extra, id, content }) => {
+    const model = useSessionStore(agentSelectors.currentAgentModel);
+    const loading = useSessionStore((s) => s.chatLoadingId === id);
+
+    const showModelTag = extra?.fromModel && model !== extra?.fromModel;
+    const showExtra = extra?.showModelTag || extra?.translate || extra?.tts;
+    if (!showExtra) return;
+
+    return (
+      <Flexbox gap={8} style={{ marginTop: 8 }}>
+        {showModelTag && (
+          <div>
+            <Tag icon={<SiOpenai size={'1em'} />}>{extra?.fromModel as string}</Tag>
+          </div>
+        )}
         <div>
-          <Divider style={{ margin: '12px 0' }} />
-          <Translate id={id} loading={loading} {...extra.translate} />
+          {extra?.tts && (
+            <ExtraContainer>
+              <TTS content={content} id={id} loading={loading} {...extra?.tts} />
+            </ExtraContainer>
+          )}
+          {extra?.translate && (
+            <ExtraContainer>
+              <Translate id={id} loading={loading} {...extra?.translate} />
+            </ExtraContainer>
+          )}
         </div>
-      )}
-    </Flexbox>
-  );
-});
+      </Flexbox>
+    );
+  },
+);
diff --git a/src/app/chat/features/Conversation/ChatList/Extras/AudioPlayer.tsx b/src/app/chat/features/Conversation/ChatList/Extras/AudioPlayer.tsx
new file mode 100644
index 000000000000..9baf5220b390
--- /dev/null
+++ b/src/app/chat/features/Conversation/ChatList/Extras/AudioPlayer.tsx
@@ -0,0 +1,139 @@
+import { ActionIcon, ActionIconProps, Icon, Tag } from '@lobehub/ui';
+import { Dropdown, Slider } from 'antd';
+import { Download, PauseCircle, Play, StopCircle } from 'lucide-react';
+import React, { memo, useCallback, useMemo } from 'react';
+import { Flexbox } from 'react-layout-kit';
+
+const secondsToMinutesAndSeconds = (num: number) => Math.floor(num);
+export interface AudioProps {
+  currentTime: number;
+  download: () => void;
+  duration: number;
+  isPlaying: boolean;
+  pause: () => void;
+  play: () => void;
+  setTime: (time: number) => void;
+  stop: () => void;
+}
+
+export interface AudioPlayerProps {
+  allowPause?: boolean;
+  audio: AudioProps;
+  buttonSize?: ActionIconProps['size'];
+  className?: string;
+  isLoading?: boolean;
+  onInitPlay?: () => void;
+  onPause?: () => void;
+  onPlay?: () => void;
+  onStop?: () => void;
+  showSlider?: boolean;
+  style?: React.CSSProperties;
+  timeRender?: 'tag' | 'text';
+  timeStyle?: React.CSSProperties;
+  timeType?: 'left' | 'current' | 'combine';
+}
+
+const AudioPlayer = memo<AudioPlayerProps>(
+  ({
+    isLoading,
+    style,
+    timeStyle,
+    buttonSize,
+    className,
+    audio,
+    allowPause = true,
+    timeType = 'left',
+    showSlider = true,
+    timeRender = 'text',
+    onInitPlay,
+    onPause,
+    onStop,
+    onPlay,
+  }) => {
+    const { isPlaying, play, stop, pause, duration, setTime, currentTime, download } = audio;
+
+    const formatedLeftTime = secondsToMinutesAndSeconds(duration - currentTime);
+    const formatedCurrentTime = secondsToMinutesAndSeconds(currentTime);
+    const formatedDuration = secondsToMinutesAndSeconds(duration);
+
+    const Time = useMemo(
+      () => (timeRender === 'tag' ? Tag : (props: any) => <div {...props} />),
+      [timeRender],
+    );
+
+    const handlePlay = useCallback(() => {
+      if ((!duration || duration === 0) && !isLoading) {
+        onInitPlay?.();
+      } else {
+        play?.();
+        onPlay?.();
+      }
+    }, [play, duration]);
+
+    const handlePause = useCallback(() => {
+      pause?.();
+      onPause?.();
+    }, [pause]);
+
+    const handleStop = useCallback(() => {
+      stop?.();
+      onStop?.();
+    }, [stop]);
+
+    return (
+      <Flexbox
+        align={'center'}
+        className={className}
+        gap={8}
+        horizontal
+        style={{ paddingRight: 8, width: '100%', ...style }}
+      >
+        <ActionIcon
+          icon={isPlaying ? (allowPause ? PauseCircle : StopCircle) : Play}
+          loading={isLoading}
+          onClick={isPlaying ? (allowPause ? handlePause : handleStop) : handlePlay}
+          size={buttonSize || { blockSize: 32, fontSize: 16 }}
+          style={{ flex: 'none' }}
+        />
+        {showSlider && (
+          <Slider
+            disabled={duration === 0}
+            max={duration}
+            min={0}
+            onChange={(e) => setTime(e)}
+            step={0.01}
+            style={{ flex: 1 }}
+            tooltip={{ formatter: secondsToMinutesAndSeconds as any }}
+            value={currentTime}
+          />
+        )}
+        <Dropdown
+          disabled={duration === 0}
+          menu={{
+            items: [
+              {
+                key: 'download',
+                label: <Icon icon={Download} size={{ fontSize: 16 }} />,
+                onClick: download,
+              },
+            ],
+          }}
+          placement="top"
+        >
+          <Time style={{ cursor: 'pointer', flex: 'none', ...timeStyle }}>
+            {timeType === 'left' && formatedLeftTime}
+            {timeType === 'current' && formatedCurrentTime}
+            {timeType === 'combine' && (
+              <span>
+                {formatedCurrentTime}
+                <span style={{ opacity: 0.66 }}>{` / ${formatedDuration}`}</span>
+              </span>
+            )}
+          </Time>
+        </Dropdown>
+      </Flexbox>
+    );
+  },
+);
+
+export default AudioPlayer;
diff --git a/src/app/chat/features/Conversation/ChatList/Extras/ExtraContainer.tsx b/src/app/chat/features/Conversation/ChatList/Extras/ExtraContainer.tsx
new file mode 100644
index 000000000000..1998662d079a
--- /dev/null
+++ b/src/app/chat/features/Conversation/ChatList/Extras/ExtraContainer.tsx
@@ -0,0 +1,13 @@
+import { Divider } from 'antd';
+import { PropsWithChildren, memo } from 'react';
+
+const ExtraContainer = memo<PropsWithChildren>(({ children }) => {
+  return (
+    <div>
+      <Divider style={{ margin: '8px 0' }} />
+      {children}
+    </div>
+  );
+});
+
+export default ExtraContainer;
diff --git a/src/app/chat/features/Conversation/ChatList/Extras/TTS.tsx b/src/app/chat/features/Conversation/ChatList/Extras/TTS.tsx
new file mode 100644
index 000000000000..98e0b17ddf87
--- /dev/null
+++ b/src/app/chat/features/Conversation/ChatList/Extras/TTS.tsx
@@ -0,0 +1,122 @@
+import { AudioPlayer } from '@lobehub/tts/react';
+import { ActionIcon, Alert, Highlighter } from '@lobehub/ui';
+import { Button } from 'antd';
+import { TrashIcon } from 'lucide-react';
+import { memo, useCallback, useEffect, useState } from 'react';
+import { useTranslation } from 'react-i18next';
+import { Flexbox } from 'react-layout-kit';
+
+import { useTTS } from '@/hooks/useTTS';
+import { useSessionStore } from '@/store/session';
+import { ChatMessageError, ChatTTS } from '@/types/chatMessage';
+import { getMessageError } from '@/utils/fetch';
+
+interface TTSProps extends ChatTTS {
+  content: string;
+  id: string;
+  loading?: boolean;
+}
+
+const TTS = memo<TTSProps>(({ id, init, content }) => {
+  const [isStart, setIsStart] = useState(false);
+  const [error, setError] = useState<ChatMessageError>();
+  const { t } = useTranslation('chat');
+
+  const [ttsMessage, clearTTS] = useSessionStore((s) => [s.ttsMessage, s.clearTTS]);
+
+  const setDefaultError = useCallback(
+    (err?: any) => {
+      setError({ body: err, message: t('tts.responseError', { ns: 'error' }), type: 500 });
+    },
+    [t],
+  );
+
+  const { isGlobalLoading, audio, start, stop, response } = useTTS(content, {
+    onError: (err) => {
+      stop();
+      setDefaultError(err);
+    },
+    onErrorRetry: (err) => {
+      stop();
+      setDefaultError(err);
+    },
+    onSuccess: async () => {
+      if (!response) return;
+      if (response.status === 200) return ttsMessage(id, true);
+      const message = await getMessageError(response);
+      if (message) {
+        setError(message);
+      } else {
+        setDefaultError();
+      }
+      stop();
+    },
+  });
+
+  const handleInitStart = useCallback(() => {
+    if (isStart) return;
+    start();
+    setIsStart(true);
+  }, [isStart]);
+
+  const handleDelete = useCallback(() => {
+    stop();
+    clearTTS(id);
+  }, [stop, id]);
+
+  const handleRetry = useCallback(() => {
+    setError(undefined);
+    start();
+  }, [start]);
+
+  useEffect(() => {
+    if (init) return;
+    handleInitStart();
+  }, [init]);
+
+  return (
+    <Flexbox align={'center'} horizontal style={{ minWidth: 160, width: '100%' }}>
+      {error ? (
+        <Alert
+          action={
+            <Button onClick={handleRetry} size={'small'} type={'primary'}>
+              {t('retry', { ns: 'common' })}
+            </Button>
+          }
+          closable
+          extra={
+            error.body && (
+              <Highlighter copyButtonSize={'small'} language={'json'} type={'pure'}>
+                {JSON.stringify(error.body, null, 2)}
+              </Highlighter>
+            )
+          }
+          message={error.message}
+          onClose={handleDelete}
+          style={{ alignItems: 'center', width: '100%' }}
+          type="error"
+        />
+      ) : (
+        <>
+          <AudioPlayer
+            audio={audio}
+            buttonSize={'small'}
+            isLoading={isGlobalLoading}
+            onInitPlay={handleInitStart}
+            onLoadingStop={stop}
+            timeRender={'tag'}
+            timeStyle={{ margin: 0 }}
+          />
+          <ActionIcon
+            icon={TrashIcon}
+            onClick={handleDelete}
+            size={'small'}
+            title={t('tts.clear')}
+          />
+        </>
+      )}
+    </Flexbox>
+  );
+});
+
+export default TTS;
diff --git a/src/app/chat/features/Conversation/ChatList/Extras/User.tsx b/src/app/chat/features/Conversation/ChatList/Extras/User.tsx
index 420e2b6b11fa..0acc825ab38c 100644
--- a/src/app/chat/features/Conversation/ChatList/Extras/User.tsx
+++ b/src/app/chat/features/Conversation/ChatList/Extras/User.tsx
@@ -1,24 +1,31 @@
 import { RenderMessageExtra } from '@lobehub/ui';
-import { Divider } from 'antd';
 import { memo } from 'react';
-import { Flexbox } from 'react-layout-kit';
 
 import { useSessionStore } from '@/store/session';
+import { ChatMessage } from '@/types/chatMessage';
 
+import ExtraContainer from './ExtraContainer';
+import TTS from './TTS';
 import Translate from './Translate';
 
-export const UserMessageExtra: RenderMessageExtra = memo(({ extra, id }) => {
-  const hasTranslate = !!extra?.translate;
-
+export const UserMessageExtra: RenderMessageExtra = memo<ChatMessage>(({ extra, id, content }) => {
   const loading = useSessionStore((s) => s.chatLoadingId === id);
+
+  const showExtra = extra?.translate || extra?.tts;
+  if (!showExtra) return;
+
   return (
-    <Flexbox gap={8} style={{ marginTop: hasTranslate ? 8 : 0 }}>
+    <div style={{ marginTop: 8 }}>
+      {extra?.tts && (
+        <ExtraContainer>
+          <TTS content={content} id={id} loading={loading} {...extra?.tts} />
+        </ExtraContainer>
+      )}
       {extra?.translate && (
-        <div>
-          <Divider style={{ margin: '12px 0' }} />
-          <Translate id={id} {...extra.translate} loading={loading} />
-        </div>
+        <ExtraContainer>
+          <Translate id={id} {...extra?.translate} loading={loading} />
+        </ExtraContainer>
       )}
-    </Flexbox>
+    </div>
   );
 });
diff --git a/src/app/chat/settings/features/HeaderContent.tsx b/src/app/chat/settings/features/HeaderContent.tsx
index bc7f9bce114f..27e54e17178e 100644
--- a/src/app/chat/settings/features/HeaderContent.tsx
+++ b/src/app/chat/settings/features/HeaderContent.tsx
@@ -5,7 +5,7 @@ import { HardDriveDownload } from 'lucide-react';
 import { memo, useMemo } from 'react';
 import { useTranslation } from 'react-i18next';
 
-import { MOBILE_HEADER_ICON_SIZE } from '@/const/layoutTokens';
+import { HEADER_ICON_SIZE } from '@/const/layoutTokens';
 import { exportSingleAgent, exportSingleSession } from '@/helpers/export';
 import { useSessionStore } from '@/store/session';
 
@@ -41,13 +41,15 @@ export const HeaderContent = memo<{ mobile?: boolean }>(() => {
     [],
   );
 
-  const size = mobile ? MOBILE_HEADER_ICON_SIZE : { fontSize: 24 };
-
   return (
     <>
       <SubmitAgentButton />
       <Dropdown arrow={false} menu={{ items }} trigger={['click']}>
-        <ActionIcon icon={HardDriveDownload} size={size} title={t('export', { ns: 'common' })} />
+        <ActionIcon
+          icon={HardDriveDownload}
+          size={HEADER_ICON_SIZE(mobile)}
+          title={t('export', { ns: 'common' })}
+        />
       </Dropdown>
     </>
   );
diff --git a/src/app/chat/settings/features/SubmitAgentButton/index.tsx b/src/app/chat/settings/features/SubmitAgentButton/index.tsx
index f8d4693b5d97..97748c532c18 100644
--- a/src/app/chat/settings/features/SubmitAgentButton/index.tsx
+++ b/src/app/chat/settings/features/SubmitAgentButton/index.tsx
@@ -4,7 +4,7 @@ import { Share2 } from 'lucide-react';
 import { memo, useState } from 'react';
 import { useTranslation } from 'react-i18next';
 
-import { MOBILE_HEADER_ICON_SIZE } from '@/const/layoutTokens';
+import { HEADER_ICON_SIZE } from '@/const/layoutTokens';
 
 import Inner from './Inner';
 
@@ -12,13 +12,12 @@ const SubmitAgentButton = memo(() => {
   const { t } = useTranslation('setting');
   const { mobile } = useResponsive();
   const [isModalOpen, setIsModalOpen] = useState(false);
-  const size = mobile ? MOBILE_HEADER_ICON_SIZE : { fontSize: 24 };
   return (
     <>
       <ActionIcon
         icon={Share2}
         onClick={() => setIsModalOpen(true)}
-        size={size}
+        size={HEADER_ICON_SIZE(mobile)}
         title={t('submitAgentModal.tooltips')}
       />
       <Modal
diff --git a/src/app/settings/features/SideBar/List.tsx b/src/app/settings/features/SideBar/List.tsx
index 69e77c9f9c2f..2ea92812378c 100644
--- a/src/app/settings/features/SideBar/List.tsx
+++ b/src/app/settings/features/SideBar/List.tsx
@@ -1,5 +1,5 @@
 import { useResponsive } from 'antd-style';
-import { Bot, Settings2, Webhook } from 'lucide-react';
+import { Bot, Mic2, Settings2, Webhook } from 'lucide-react';
 import Link from 'next/link';
 import { memo } from 'react';
 import { useTranslation } from 'react-i18next';
@@ -17,6 +17,7 @@ const List = memo(() => {
   const items = [
     { icon: Settings2, label: t('tab.common'), value: SettingsTabs.Common },
     { icon: Webhook, label: t('tab.llm'), value: SettingsTabs.LLM },
+    { icon: Mic2, label: t('tab.tts'), value: SettingsTabs.TTS },
     { icon: Bot, label: t('tab.agent'), value: SettingsTabs.Agent },
   ];
 
diff --git a/src/app/settings/features/SideBar/index.tsx b/src/app/settings/features/SideBar/index.tsx
index 3c0d8e10ce8a..2d25c6995ce4 100644
--- a/src/app/settings/features/SideBar/index.tsx
+++ b/src/app/settings/features/SideBar/index.tsx
@@ -30,8 +30,8 @@ const SideBar = memo(() => {
             <Logo className={styles.logo} extra={'Settings'} size={36} type={'text'} />
           </div>
         </Flexbox>
-        <UpgradeAlert />
         <Flexbox gap={2} style={{ paddingInline: 8 }}>
+          <UpgradeAlert />
           <List />
         </Flexbox>
       </DraggablePanelBody>
diff --git a/src/app/settings/features/UpgradeAlert.tsx b/src/app/settings/features/UpgradeAlert.tsx
index 1f5d07e6606d..595e2161342c 100644
--- a/src/app/settings/features/UpgradeAlert.tsx
+++ b/src/app/settings/features/UpgradeAlert.tsx
@@ -21,10 +21,10 @@ const UpgradeAlert = memo(() => {
             </Button>
           </Link>
         }
-        banner
         closable
         message={`✨ ${t('upgradeVersion.newVersion', { version: latestVersion })}`}
         showIcon={false}
+        style={{ marginBottom: 6 }}
         type={'info'}
       />
     )
diff --git a/src/app/settings/tts/TTS/index.tsx b/src/app/settings/tts/TTS/index.tsx
new file mode 100644
index 000000000000..af34580167ae
--- /dev/null
+++ b/src/app/settings/tts/TTS/index.tsx
@@ -0,0 +1,73 @@
+import { Form, type ItemGroup } from '@lobehub/ui';
+import { Form as AntForm, Select, Switch } from 'antd';
+import isEqual from 'fast-deep-equal';
+import { debounce } from 'lodash-es';
+import { Mic, Webhook } from 'lucide-react';
+import { memo } from 'react';
+import { useTranslation } from 'react-i18next';
+
+import { FORM_STYLE } from '@/const/layoutTokens';
+import { settingsSelectors, useGlobalStore } from '@/store/global';
+
+import { opeanaiSTTOptions, opeanaiTTSOptions, sttOptions } from './options';
+
+type SettingItemGroup = ItemGroup;
+
+const TTS_SETTING_KEY = 'tts';
+
+const TTS = memo(() => {
+  const { t } = useTranslation('setting');
+  const [form] = AntForm.useForm();
+  const settings = useGlobalStore(settingsSelectors.currentSettings, isEqual);
+  const [setSettings] = useGlobalStore((s) => [s.setSettings]);
+
+  const stt: SettingItemGroup = {
+    children: [
+      {
+        children: <Select options={sttOptions} />,
+        desc: t('settingTTS.sttService.desc'),
+        label: t('settingTTS.sttService.title'),
+        name: [TTS_SETTING_KEY, 'sttServer'],
+      },
+      {
+        children: <Switch />,
+        desc: t('settingTTS.sttAutoStop.desc'),
+        label: t('settingTTS.sttAutoStop.title'),
+        minWidth: undefined,
+        name: [TTS_SETTING_KEY, 'sttAutoStop'],
+        valuePropName: 'checked',
+      },
+    ],
+    icon: Mic,
+    title: t('settingTTS.stt'),
+  };
+
+  const openai: SettingItemGroup = {
+    children: [
+      {
+        children: <Select options={opeanaiTTSOptions} />,
+        label: t('settingTTS.openai.ttsModel'),
+        name: [TTS_SETTING_KEY, 'openAI', 'ttsModel'],
+      },
+      {
+        children: <Select options={opeanaiSTTOptions} />,
+        label: t('settingTTS.openai.sttModel'),
+        name: [TTS_SETTING_KEY, 'openAI', 'sttModel'],
+      },
+    ],
+    icon: Webhook,
+    title: t('llm.OpenAI.title'),
+  };
+
+  return (
+    <Form
+      form={form}
+      initialValues={settings}
+      items={[stt, openai]}
+      onValuesChange={debounce(setSettings, 100)}
+      {...FORM_STYLE}
+    />
+  );
+});
+
+export default TTS;
diff --git a/src/app/settings/tts/TTS/options.ts b/src/app/settings/tts/TTS/options.ts
new file mode 100644
index 000000000000..c3e26a5847b7
--- /dev/null
+++ b/src/app/settings/tts/TTS/options.ts
@@ -0,0 +1,30 @@
+import { SelectProps } from 'antd';
+
+export const opeanaiTTSOptions: SelectProps['options'] = [
+  {
+    label: 'tts-1',
+    value: 'tts-1',
+  },
+  {
+    label: 'tts-1-hd',
+    value: 'tts-1-hd',
+  },
+];
+
+export const opeanaiSTTOptions: SelectProps['options'] = [
+  {
+    label: 'whisper-1',
+    value: 'whisper-1',
+  },
+];
+
+export const sttOptions: SelectProps['options'] = [
+  {
+    label: 'OpenAI',
+    value: 'openai',
+  },
+  {
+    label: 'Browser',
+    value: 'browser',
+  },
+];
diff --git a/src/app/settings/tts/index.tsx b/src/app/settings/tts/index.tsx
new file mode 100644
index 000000000000..650d41a42f59
--- /dev/null
+++ b/src/app/settings/tts/index.tsx
@@ -0,0 +1,21 @@
+'use client';
+
+import { memo } from 'react';
+import { useTranslation } from 'react-i18next';
+
+import PageTitle from '@/components/PageTitle';
+import { useSwitchSideBarOnInit } from '@/store/global/hooks/useSwitchSettingsOnInit';
+import { SettingsTabs } from '@/store/global/initialState';
+
+import TTS from './TTS';
+
+export default memo(() => {
+  useSwitchSideBarOnInit(SettingsTabs.TTS);
+  const { t } = useTranslation('setting');
+  return (
+    <>
+      <PageTitle title={t('tab.llm')} />
+      <TTS />
+    </>
+  );
+});
diff --git a/src/app/settings/tts/layout.tsx b/src/app/settings/tts/layout.tsx
new file mode 100644
index 000000000000..781be56dc4f8
--- /dev/null
+++ b/src/app/settings/tts/layout.tsx
@@ -0,0 +1 @@
+export { default } from '../layout.server';
diff --git a/src/app/settings/tts/page.tsx b/src/app/settings/tts/page.tsx
new file mode 100644
index 000000000000..62784df84f41
--- /dev/null
+++ b/src/app/settings/tts/page.tsx
@@ -0,0 +1,3 @@
+import Index from './index';
+
+export default () => <Index />;
diff --git a/src/components/HotKeys/index.tsx b/src/components/HotKeys/index.tsx
index 2c3197abd73c..2f2a4238bd38 100644
--- a/src/components/HotKeys/index.tsx
+++ b/src/components/HotKeys/index.tsx
@@ -1,7 +1,7 @@
 'use client';
 
 import { createStyles } from 'antd-style';
-import { Fragment, memo, useEffect, useState } from 'react';
+import { memo, useEffect, useState } from 'react';
 import { Flexbox } from 'react-layout-kit';
 
 import { CLEAN_MESSAGE_KEY, PREFIX_KEY } from '@/const/hotkeys';
@@ -56,12 +56,9 @@ const HotKeys = memo<HotKeysProps>(({ keys, desc }) => {
   const content = (
     <Flexbox align={'center'} className={styles} gap={2} horizontal>
       {keysGroup.map((key, index) => (
-        <Fragment key={index}>
-          <kbd>
-            <span style={{ visibility }}>{key.toUpperCase()}</span>
-          </kbd>
-          {index + 1 < keysGroup.length && <span>+</span>}
-        </Fragment>
+        <kbd key={index}>
+          <span style={{ visibility }}>{key.toUpperCase()}</span>
+        </kbd>
       ))}
     </Flexbox>
   );
diff --git a/src/const/layoutTokens.ts b/src/const/layoutTokens.ts
index 441ec7c16e1d..87e177e3839c 100644
--- a/src/const/layoutTokens.ts
+++ b/src/const/layoutTokens.ts
@@ -14,3 +14,6 @@ export const FORM_STYLE: FormProps = {
   style: { maxWidth: MAX_WIDTH, width: '100%' },
 };
 export const MOBILE_HEADER_ICON_SIZE = { blockSize: 36, fontSize: 22 };
+export const DESKTOP_HEADER_ICON_SIZE = { fontSize: 24 };
+export const HEADER_ICON_SIZE = (mobile?: boolean) =>
+  mobile ? MOBILE_HEADER_ICON_SIZE : DESKTOP_HEADER_ICON_SIZE;
diff --git a/src/const/settings.ts b/src/const/settings.ts
index 3edbd147512a..1d14644b0319 100644
--- a/src/const/settings.ts
+++ b/src/const/settings.ts
@@ -2,12 +2,13 @@ import { getClientConfig } from '@/config/client';
 import { DEFAULT_OPENAI_MODEL_LIST } from '@/const/llm';
 import { DEFAULT_AGENT_META } from '@/const/meta';
 import { LanguageModel } from '@/types/llm';
-import { LobeAgentConfig } from '@/types/session';
+import { LobeAgentConfig, LobeAgentTTSConfig } from '@/types/session';
 import {
   GlobalBaseSettings,
   GlobalDefaultAgent,
   GlobalLLMConfig,
   GlobalSettings,
+  GlobalTTSConfig,
 } from '@/types/settings';
 
 export const DEFAULT_BASE_SETTINGS: GlobalBaseSettings = {
@@ -18,6 +19,15 @@ export const DEFAULT_BASE_SETTINGS: GlobalBaseSettings = {
   themeMode: 'auto',
 };
 
+export const DEFAUTT_AGENT_TTS_CONFIG: LobeAgentTTSConfig = {
+  showAllLocaleVoice: false,
+  sttLocale: 'auto',
+  ttsService: 'openai',
+  voice: {
+    openai: 'alloy',
+  },
+};
+
 export const VISION_MODEL_DEFAULT_MAX_TOKENS = 1000;
 
 export const DEFAULT_AGENT_CONFIG: LobeAgentConfig = {
@@ -32,6 +42,7 @@ export const DEFAULT_AGENT_CONFIG: LobeAgentConfig = {
   },
   plugins: [],
   systemRole: '',
+  tts: DEFAUTT_AGENT_TTS_CONFIG,
 };
 
 export const DEFAULT_LLM_CONFIG: GlobalLLMConfig = {
@@ -48,8 +59,18 @@ export const DEFAULT_AGENT: GlobalDefaultAgent = {
   meta: DEFAULT_AGENT_META,
 };
 
+export const DEFAULT_TTS_CONFIG: GlobalTTSConfig = {
+  openAI: {
+    sttModel: 'whisper-1',
+    ttsModel: 'tts-1',
+  },
+  sttAutoStop: true,
+  sttServer: 'openai',
+};
+
 export const DEFAULT_SETTINGS: GlobalSettings = {
   defaultAgent: DEFAULT_AGENT,
   languageModel: DEFAULT_LLM_CONFIG,
+  tts: DEFAULT_TTS_CONFIG,
   ...DEFAULT_BASE_SETTINGS,
 };
diff --git a/src/features/AgentSetting/AgentTTS/SelectWithTTSPreview.tsx b/src/features/AgentSetting/AgentTTS/SelectWithTTSPreview.tsx
new file mode 100644
index 000000000000..87e47583db18
--- /dev/null
+++ b/src/features/AgentSetting/AgentTTS/SelectWithTTSPreview.tsx
@@ -0,0 +1,119 @@
+import { AudioPlayer } from '@lobehub/tts/react';
+import { Alert, Highlighter } from '@lobehub/ui';
+import { Button, RefSelectProps, Select, SelectProps } from 'antd';
+import { useTheme } from 'antd-style';
+import { forwardRef, useCallback, useState } from 'react';
+import { useTranslation } from 'react-i18next';
+import { Flexbox } from 'react-layout-kit';
+
+import { useTTS } from '@/hooks/useTTS';
+import { ChatMessageError } from '@/types/chatMessage';
+import { TTSServer } from '@/types/session';
+import { getMessageError } from '@/utils/fetch';
+
+interface SelectWithTTSPreviewProps extends SelectProps {
+  server: TTSServer;
+}
+
+const SelectWithTTSPreview = forwardRef<RefSelectProps, SelectWithTTSPreviewProps>(
+  ({ value, options, server, onSelect, ...rest }, ref) => {
+    const [error, setError] = useState<ChatMessageError>();
+    const [voice, setVoice] = useState<string>(value);
+    const { t } = useTranslation('welcome');
+    const theme = useTheme();
+    const PREVIEW_TEXT = ['Lobe Chat', t('slogan.title'), t('slogan.desc1')].join('. ');
+
+    const setDefaultError = useCallback(
+      (err?: any) => {
+        setError({ body: err, message: t('tts.responseError', { ns: 'error' }), type: 500 });
+      },
+      [t],
+    );
+
+    const { isGlobalLoading, audio, stop, start, response, setText } = useTTS(PREVIEW_TEXT, {
+      onError: (err) => {
+        stop();
+        setDefaultError(err);
+      },
+      onErrorRetry: (err) => {
+        stop();
+        setDefaultError(err);
+      },
+      onSuccess: async () => {
+        if (!response) return;
+        if (response.status === 200) return;
+        const message = await getMessageError(response);
+        if (message) {
+          setError(message);
+        } else {
+          setDefaultError();
+        }
+        stop();
+      },
+      server,
+      voice,
+    });
+
+    const handleCloseError = useCallback(() => {
+      setError(undefined);
+      stop();
+    }, [stop]);
+
+    const handleRetry = useCallback(() => {
+      setError(undefined);
+      stop();
+      start();
+    }, [stop, start]);
+
+    const handleSelect: SelectProps['onSelect'] = (value, option) => {
+      stop();
+      setVoice(value as string);
+      setText([PREVIEW_TEXT, option?.label].join(' - '));
+      onSelect?.(value, option);
+    };
+    return (
+      <Flexbox gap={8}>
+        <Flexbox align={'center'} gap={8} horizontal style={{ width: '100%' }}>
+          <Select onSelect={handleSelect} options={options} ref={ref} value={value} {...rest} />
+          <AudioPlayer
+            allowPause={false}
+            audio={audio}
+            buttonActive
+            buttonSize={{ blockSize: 36, fontSize: 16 }}
+            buttonStyle={{ border: `1px solid ${theme.colorBorder}` }}
+            isLoading={isGlobalLoading}
+            onInitPlay={start}
+            onLoadingStop={stop}
+            showSlider={false}
+            showTime={false}
+            style={{ flex: 'none', padding: 0, width: 'unset' }}
+            title={t('settingTTS.voice.preview', { ns: 'setting' })}
+          />
+        </Flexbox>
+        {error && (
+          <Alert
+            action={
+              <Button onClick={handleRetry} size={'small'} type={'primary'}>
+                {t('retry', { ns: 'common' })}
+              </Button>
+            }
+            closable
+            extra={
+              error.body && (
+                <Highlighter copyButtonSize={'small'} language={'json'} type={'pure'}>
+                  {JSON.stringify(error.body, null, 2)}
+                </Highlighter>
+              )
+            }
+            message={error.message}
+            onClose={handleCloseError}
+            style={{ alignItems: 'center', width: '100%' }}
+            type="error"
+          />
+        )}
+      </Flexbox>
+    );
+  },
+);
+
+export default SelectWithTTSPreview;
diff --git a/src/features/AgentSetting/AgentTTS/index.tsx b/src/features/AgentSetting/AgentTTS/index.tsx
new file mode 100644
index 000000000000..44449a45aa53
--- /dev/null
+++ b/src/features/AgentSetting/AgentTTS/index.tsx
@@ -0,0 +1,115 @@
+import { VoiceList } from '@lobehub/tts';
+import { Form, ItemGroup } from '@lobehub/ui';
+import { Form as AFrom, Select, Switch } from 'antd';
+import isEqual from 'fast-deep-equal';
+import { debounce } from 'lodash-es';
+import { Mic } from 'lucide-react';
+import { memo, useEffect } from 'react';
+import { useTranslation } from 'react-i18next';
+
+import { FORM_STYLE } from '@/const/layoutTokens';
+import SelectWithTTSPreview from '@/features/AgentSetting/AgentTTS/SelectWithTTSPreview';
+import { settingsSelectors, useGlobalStore } from '@/store/global';
+
+import { useStore } from '../store';
+import { ttsOptions } from './options';
+
+const TTS_SETTING_KEY = 'tts';
+const { openaiVoiceOptions, localeOptions } = VoiceList;
+
+const AgentTTS = memo(() => {
+  const { t } = useTranslation('setting');
+  const updateConfig = useStore((s) => s.setAgentConfig);
+  const [form] = AFrom.useForm();
+  const voiceList = useGlobalStore((s) => {
+    const locale = settingsSelectors.currentLanguage(s);
+    return (all?: boolean) => new VoiceList(all ? undefined : locale);
+  });
+  const config = useStore((s) => s.config, isEqual);
+
+  useEffect(() => {
+    form.setFieldsValue(config);
+  }, [config]);
+
+  const showAllLocaleVoice = config.tts.showAllLocaleVoice;
+
+  const { edgeVoiceOptions, microsoftVoiceOptions } = voiceList(showAllLocaleVoice);
+
+  const tts: ItemGroup = {
+    children: [
+      {
+        children: <Select options={ttsOptions} />,
+        desc: t('settingTTS.ttsService.desc'),
+        label: t('settingTTS.ttsService.title'),
+        name: [TTS_SETTING_KEY, 'ttsService'],
+      },
+      {
+        children: <Switch />,
+        desc: t('settingTTS.showAllLocaleVoice.desc'),
+        hidden: config.tts.ttsService === 'openai',
+        label: t('settingTTS.showAllLocaleVoice.title'),
+        minWidth: undefined,
+        name: [TTS_SETTING_KEY, 'showAllLocaleVoice'],
+        valuePropName: 'checked',
+      },
+      {
+        children: <SelectWithTTSPreview options={openaiVoiceOptions} server={'openai'} />,
+        desc: t('settingTTS.voice.desc'),
+        hidden: config.tts.ttsService !== 'openai',
+        label: t('settingTTS.voice.title'),
+        name: [TTS_SETTING_KEY, 'voice', 'openai'],
+      },
+      {
+        children: <SelectWithTTSPreview options={edgeVoiceOptions} server={'edge'} />,
+        desc: t('settingTTS.voice.desc'),
+        divider: false,
+        hidden: config.tts.ttsService !== 'edge',
+        label: t('settingTTS.voice.title'),
+        name: [TTS_SETTING_KEY, 'voice', 'edge'],
+      },
+      {
+        children: <SelectWithTTSPreview options={microsoftVoiceOptions} server={'microsoft'} />,
+        desc: t('settingTTS.voice.desc'),
+        divider: false,
+        hidden: config.tts.ttsService !== 'microsoft',
+        label: t('settingTTS.voice.title'),
+        name: [TTS_SETTING_KEY, 'voice', 'microsoft'],
+      },
+      {
+        children: (
+          <Select
+            options={[
+              { label: t('settingTheme.lang.autoMode'), value: 'auto' },
+              ...(localeOptions || []),
+            ]}
+          />
+        ),
+        desc: t('settingTTS.sttLocale.desc'),
+        label: t('settingTTS.sttLocale.title'),
+        name: [TTS_SETTING_KEY, 'sttLocale'],
+      },
+    ],
+    icon: Mic,
+    title: t('settingTTS.title'),
+  };
+
+  return (
+    <Form
+      form={form}
+      initialValues={{
+        [TTS_SETTING_KEY]: {
+          voice: {
+            edge: edgeVoiceOptions?.[0].value,
+            microsoft: microsoftVoiceOptions?.[0].value,
+            openai: openaiVoiceOptions?.[0].value,
+          },
+        },
+      }}
+      items={[tts]}
+      onValuesChange={debounce(updateConfig, 100)}
+      {...FORM_STYLE}
+    />
+  );
+});
+
+export default AgentTTS;
diff --git a/src/features/AgentSetting/AgentTTS/options.ts b/src/features/AgentSetting/AgentTTS/options.ts
new file mode 100644
index 000000000000..e5205afe4b9e
--- /dev/null
+++ b/src/features/AgentSetting/AgentTTS/options.ts
@@ -0,0 +1,16 @@
+import { SelectProps } from 'antd';
+
+export const ttsOptions: SelectProps['options'] = [
+  {
+    label: 'OpenAI',
+    value: 'openai',
+  },
+  {
+    label: 'Edge Speech',
+    value: 'edge',
+  },
+  {
+    label: 'Microsoft Speech',
+    value: 'microsoft',
+  },
+];
diff --git a/src/features/AgentSetting/index.tsx b/src/features/AgentSetting/index.tsx
index 7ce997af1b3c..9d5a39c8da6c 100644
--- a/src/features/AgentSetting/index.tsx
+++ b/src/features/AgentSetting/index.tsx
@@ -4,6 +4,7 @@ import AgentConfig from './AgentConfig';
 import AgentMeta from './AgentMeta';
 import AgentPlugin from './AgentPlugin';
 import AgentPrompt from './AgentPrompt';
+import AgentTTS from './AgentTTS';
 import StoreUpdater, { StoreUpdaterProps } from './StoreUpdater';
 import { Provider, createStore } from './store';
 
@@ -13,10 +14,10 @@ const AgentSettings = memo<AgentSettingsProps>((props) => {
   return (
     <Provider createStore={createStore}>
       <StoreUpdater {...props} />
-
       <AgentPrompt />
       <AgentMeta />
       <AgentConfig />
+      <AgentTTS />
       <AgentPlugin />
     </Provider>
   );
diff --git a/src/hooks/useSTT.ts b/src/hooks/useSTT.ts
new file mode 100644
index 000000000000..98a27dc8eecd
--- /dev/null
+++ b/src/hooks/useSTT.ts
@@ -0,0 +1,61 @@
+import { getRecordMineType } from '@lobehub/tts';
+import {
+  OpenAISTTOptions,
+  SpeechRecognitionOptions,
+  useOpenAISTT,
+  useSpeechRecognition,
+} from '@lobehub/tts/react';
+import isEqual from 'fast-deep-equal';
+import { SWRConfiguration } from 'swr';
+
+import { createHeaderWithOpenAI } from '@/services/_header';
+import { OPENAI_URLS } from '@/services/_url';
+import { settingsSelectors, useGlobalStore } from '@/store/global';
+import { useSessionStore } from '@/store/session';
+import { agentSelectors } from '@/store/session/slices/agentConfig';
+
+interface STTConfig extends SWRConfiguration {
+  onTextChange: (value: string) => void;
+}
+
+export const useSTT = (config: STTConfig) => {
+  const ttsSettings = useGlobalStore(settingsSelectors.currentTTS, isEqual);
+  const ttsAgentSettings = useSessionStore(agentSelectors.currentAgentTTS, isEqual);
+  const locale = useGlobalStore(settingsSelectors.currentLanguage);
+
+  const autoStop = ttsSettings.sttAutoStop;
+  const sttLocale =
+    ttsAgentSettings?.sttLocale && ttsAgentSettings.sttLocale !== 'auto'
+      ? ttsAgentSettings.sttLocale
+      : locale;
+
+  let useSelectedSTT;
+  let options: any = {};
+
+  switch (ttsSettings.sttServer) {
+    case 'openai': {
+      useSelectedSTT = useOpenAISTT;
+      options = {
+        api: {
+          headers: createHeaderWithOpenAI(),
+          serviceUrl: OPENAI_URLS.stt,
+        },
+        autoStop,
+        options: {
+          mineType: getRecordMineType(),
+          model: ttsSettings.openAI.sttModel,
+        },
+      } as OpenAISTTOptions;
+      break;
+    }
+    case 'browser': {
+      options = {
+        autoStop,
+      } as SpeechRecognitionOptions;
+      useSelectedSTT = useSpeechRecognition;
+      break;
+    }
+  }
+
+  return useSelectedSTT(sttLocale, { ...config, ...options });
+};
diff --git a/src/hooks/useTTS.ts b/src/hooks/useTTS.ts
new file mode 100644
index 000000000000..a6e74887ee29
--- /dev/null
+++ b/src/hooks/useTTS.ts
@@ -0,0 +1,87 @@
+import { VoiceList } from '@lobehub/tts';
+import {
+  EdgeSpeechOptions,
+  MicrosoftSpeechOptions,
+  OpenAITTSOptions,
+  useEdgeSpeech,
+  useMicrosoftSpeech,
+  useOpenAITTS,
+} from '@lobehub/tts/react';
+import isEqual from 'fast-deep-equal';
+import { SWRConfiguration } from 'swr';
+
+import { createHeaderWithOpenAI } from '@/services/_header';
+import { OPENAI_URLS, TTS_URL } from '@/services/_url';
+import { settingsSelectors, useGlobalStore } from '@/store/global';
+import { useSessionStore } from '@/store/session';
+import { agentSelectors } from '@/store/session/slices/agentConfig';
+import { TTSServer } from '@/types/session';
+
+interface TTSConfig extends SWRConfiguration {
+  server?: TTSServer;
+  voice?: string;
+}
+
+export const useTTS = (content: string, config?: TTSConfig) => {
+  const ttsSettings = useGlobalStore(settingsSelectors.currentTTS, isEqual);
+  const ttsAgentSettings = useSessionStore(agentSelectors.currentAgentTTS, isEqual);
+  const voiceList = useGlobalStore((s) => new VoiceList(settingsSelectors.currentLanguage(s)));
+
+  let useSelectedTTS;
+  let options: any = {};
+  switch (config?.server || ttsAgentSettings.ttsService) {
+    case 'openai': {
+      useSelectedTTS = useOpenAITTS;
+      options = {
+        api: {
+          headers: createHeaderWithOpenAI(),
+          serviceUrl: OPENAI_URLS.tts,
+        },
+        options: {
+          model: ttsSettings.openAI.ttsModel,
+          voice:
+            config?.voice ||
+            ttsAgentSettings.voice.openai ||
+            VoiceList.openaiVoiceOptions?.[0].value,
+        },
+      } as OpenAITTSOptions;
+      break;
+    }
+    case 'edge': {
+      useSelectedTTS = useEdgeSpeech;
+      options = {
+        api: {
+          /**
+           * @description client fetch
+           * serviceUrl: TTS_URL.edge,
+           */
+        },
+        options: {
+          voice:
+            config?.voice || ttsAgentSettings.voice.edge || voiceList.edgeVoiceOptions?.[0].value,
+        },
+      } as EdgeSpeechOptions;
+      break;
+    }
+    case 'microsoft': {
+      useSelectedTTS = useMicrosoftSpeech;
+      options = {
+        api: {
+          serviceUrl: TTS_URL.microsoft,
+        },
+        options: {
+          voice:
+            config?.voice ||
+            ttsAgentSettings.voice.microsoft ||
+            voiceList.microsoftVoiceOptions?.[0].value,
+        },
+      } as MicrosoftSpeechOptions;
+      break;
+    }
+  }
+
+  return useSelectedTTS(content, {
+    ...config,
+    ...options,
+  });
+};
diff --git a/src/locales/default/chat.ts b/src/locales/default/chat.ts
index f7835d6e1a3e..d8d8d769f8b1 100644
--- a/src/locales/default/chat.ts
+++ b/src/locales/default/chat.ts
@@ -38,6 +38,11 @@ export default {
     withSystemRole: '包含助手角色设定',
   },
   stop: '停止',
+  stt: {
+    action: '语音输入',
+    loading: '识别中...',
+    prettifying: '润色中...',
+  },
   temp: '临时',
   tokenDetail: '角色设定: {{systemRoleToken}} · 历史消息: {{chatsToken}}',
   tokenTag: {
@@ -58,9 +63,13 @@ export default {
     title: '话题列表',
   },
   translate: {
+    action: '翻译',
     clear: '删除翻译',
   },
-  translateTo: '翻译',
+  tts: {
+    action: '语音朗读',
+    clear: '删除语音',
+  },
   updateAgent: '更新助理信息',
   upload: {
     actionTooltip: '上传图片',
diff --git a/src/locales/default/error.ts b/src/locales/default/error.ts
index 36a01728c8df..dbeda5bc9fe9 100644
--- a/src/locales/default/error.ts
+++ b/src/locales/default/error.ts
@@ -35,6 +35,12 @@ export default {
     NoAPIKey: 'OpenAI API Key 为空，请添加自定义 OpenAI API Key',
     /* eslint-enable */
   },
+  stt: {
+    responseError: '服务请求失败，请检查配置或重试',
+  },
+  tts: {
+    responseError: '服务请求失败，请检查配置或重试',
+  },
   unlock: {
     apikey: {
       addProxyUrl: '添加 OpenAI 代理地址（可选）',
diff --git a/src/locales/default/setting.ts b/src/locales/default/setting.ts
index 229e6a8c689a..fca37441dbf2 100644
--- a/src/locales/default/setting.ts
+++ b/src/locales/default/setting.ts
@@ -109,7 +109,6 @@ export default {
     },
     title: '助手信息',
   },
-
   settingChat: {
     chatStyleType: {
       title: '聊天窗口样式',
@@ -195,6 +194,40 @@ export default {
     },
     title: '系统设置',
   },
+  settingTTS: {
+    openai: {
+      sttModel: 'OpenAI 语音识别模型',
+      ttsModel: 'OpenAI 语音合成模型',
+    },
+    showAllLocaleVoice: {
+      desc: '关闭则只显示当前语种的声源',
+      title: '显示所有语种声源',
+    },
+    stt: '语音识别设置',
+    sttAutoStop: {
+      desc: '关闭后，语音识别将不会自动结束，需要手动点击结束按钮',
+      title: '自动结束语音识别',
+    },
+    sttLocale: {
+      desc: '语音输入的语种，此选项可提高语音识别准确率',
+      title: '语音识别语种',
+    },
+    sttService: {
+      desc: '其中 broswer 为浏览器原生的语音识别服务',
+      title: '语音识别服务',
+    },
+    title: '语音服务',
+    tts: '语音合成设置',
+    ttsService: {
+      desc: '如使用 OpenAI 语音合成服务，需要保证 OpenAI 模型服务已开启',
+      title: '语音合成服务',
+    },
+    voice: {
+      desc: '为当前助手挑选一个声音，不同 TTS 服务支持的声源不同',
+      preview: '试听声源',
+      title: '语音合成声源',
+    },
+  },
   settingTheme: {
     avatar: {
       title: '头像',
@@ -234,5 +267,6 @@ export default {
     agent: '默认助手',
     common: '通用设置',
     llm: '语言模型',
+    tts: '语音服务',
   },
 };
diff --git a/src/services/_url.ts b/src/services/_url.ts
index 9521cad7745b..f0190427acdb 100644
--- a/src/services/_url.ts
+++ b/src/services/_url.ts
@@ -6,4 +6,11 @@ export const URLS = {
 export const OPENAI_URLS = {
   chat: '/api/openai/chat',
   models: '/api/openai/models',
+  stt: '/api/openai/stt',
+  tts: '/api/openai/tts',
+};
+
+export const TTS_URL = {
+  edge: '/api/tts/edge-speech',
+  microsoft: '/api/tts/microsoft-speech',
 };
diff --git a/src/store/global/initialState.ts b/src/store/global/initialState.ts
index 13a8b1ae3cdb..136510f30ad8 100644
--- a/src/store/global/initialState.ts
+++ b/src/store/global/initialState.ts
@@ -11,6 +11,7 @@ export enum SettingsTabs {
   Agent = 'agent',
   Common = 'common',
   LLM = 'llm',
+  TTS = 'tts',
 }
 
 export interface Guide {
diff --git a/src/store/global/selectors/settings.test.ts b/src/store/global/selectors/settings.test.ts
index 1f679d2d0373..50bdb685de4c 100644
--- a/src/store/global/selectors/settings.test.ts
+++ b/src/store/global/selectors/settings.test.ts
@@ -20,12 +20,28 @@ describe('settingsSelectors', () => {
               systemRole: '',
               model: LanguageModel.GPT3_5,
               params: {},
+              tts: {
+                showAllLocaleVoice: false,
+                sttLocale: 'auto',
+                ttsService: 'openai',
+                voice: {
+                  openai: 'alloy',
+                },
+              },
             },
             meta: {
               avatar: 'Default Agent',
               description: 'Default agent for testing',
             },
           },
+          tts: {
+            openAI: {
+              sttModel: 'whisper-1',
+              ttsModel: 'tts-1',
+            },
+            sttAutoStop: true,
+            sttServer: 'openai',
+          },
           languageModel: {
             openAI: {
               OPENAI_API_KEY: 'openai-api-key',
@@ -59,12 +75,28 @@ describe('settingsSelectors', () => {
               top_p: 1,
             },
             plugins: [],
+            tts: {
+              showAllLocaleVoice: false,
+              sttLocale: 'auto',
+              ttsService: 'openai',
+              voice: {
+                openai: 'alloy',
+              },
+            },
           },
           meta: {
             avatar: 'Default Agent',
             description: 'Default agent for testing',
           },
         },
+        tts: {
+          openAI: {
+            sttModel: 'whisper-1',
+            ttsModel: 'tts-1',
+          },
+          sttAutoStop: true,
+          sttServer: 'openai',
+        },
         languageModel: {
           openAI: {
             OPENAI_API_KEY: 'openai-api-key',
diff --git a/src/store/global/selectors/settings.ts b/src/store/global/selectors/settings.ts
index cce79b932363..388a166414b6 100644
--- a/src/store/global/selectors/settings.ts
+++ b/src/store/global/selectors/settings.ts
@@ -1,7 +1,12 @@
 import { DEFAULT_OPENAI_MODEL_LIST } from '@/const/llm';
 import { DEFAULT_LANG } from '@/const/locale';
 import { DEFAULT_AGENT_META } from '@/const/meta';
-import { DEFAULT_AGENT, DEFAULT_AGENT_CONFIG, DEFAULT_SETTINGS } from '@/const/settings';
+import {
+  DEFAULT_AGENT,
+  DEFAULT_AGENT_CONFIG,
+  DEFAULT_SETTINGS,
+  DEFAULT_TTS_CONFIG,
+} from '@/const/settings';
 import { Locales } from '@/locales/resources';
 import { GlobalSettings } from '@/types/settings';
 import { isOnServerSide } from '@/utils/env';
@@ -11,6 +16,8 @@ import { GlobalStore } from '../store';
 
 const currentSettings = (s: GlobalStore) => merge(DEFAULT_SETTINGS, s.settings);
 
+const currentTTS = (s: GlobalStore) => merge(DEFAULT_TTS_CONFIG, s.settings.tts);
+
 const defaultAgent = (s: GlobalStore) => merge(DEFAULT_AGENT, s.settings.defaultAgent);
 
 const defaultAgentConfig = (s: GlobalStore) => merge(DEFAULT_AGENT_CONFIG, defaultAgent(s).config);
@@ -49,6 +56,7 @@ const currentLanguage = (s: GlobalStore) => {
 export const settingsSelectors = {
   currentLanguage,
   currentSettings,
+  currentTTS,
   defaultAgent,
   defaultAgentConfig,
   defaultAgentMeta,
diff --git a/src/store/session/slices/agentConfig/selectors.ts b/src/store/session/slices/agentConfig/selectors.ts
index 3c5833aa81e4..20ef94a91d8f 100644
--- a/src/store/session/slices/agentConfig/selectors.ts
+++ b/src/store/session/slices/agentConfig/selectors.ts
@@ -2,9 +2,11 @@ import { t } from 'i18next';
 
 import { DEFAULT_OPENAI_MODEL_LIST, VISION_MODEL_WHITE_LIST } from '@/const/llm';
 import { DEFAULT_AVATAR, DEFAULT_BACKGROUND_COLOR } from '@/const/meta';
+import { DEFAUTT_AGENT_TTS_CONFIG } from '@/const/settings';
 import { SessionStore } from '@/store/session';
 import { LanguageModel } from '@/types/llm';
 import { MetaData } from '@/types/meta';
+import { LobeAgentTTSConfig } from '@/types/session';
 import { merge } from '@/utils/merge';
 
 import { sessionSelectors } from '../session/selectors';
@@ -78,6 +80,12 @@ const showTokenTag = (s: SessionStore) => {
   return DEFAULT_OPENAI_MODEL_LIST.includes(model);
 };
 
+const currentAgentTTS = (s: SessionStore): LobeAgentTTSConfig => {
+  const config = currentAgentConfig(s);
+
+  return config?.tts || DEFAUTT_AGENT_TTS_CONFIG;
+};
+
 export const agentSelectors = {
   currentAgentAvatar,
   currentAgentBackgroundColor,
@@ -87,6 +95,7 @@ export const agentSelectors = {
   currentAgentModel,
   currentAgentPlugins,
   currentAgentSystemRole,
+  currentAgentTTS,
   currentAgentTitle,
   getAvatar,
   getDescription,
diff --git a/src/store/session/slices/chat/actions/translate.ts b/src/store/session/slices/chat/actions/translate.ts
index 87e30617eaca..9c1776f3c7a9 100644
--- a/src/store/session/slices/chat/actions/translate.ts
+++ b/src/store/session/slices/chat/actions/translate.ts
@@ -16,13 +16,15 @@ const t = setNamespace('chat/translate');
  * 翻译事件
  */
 export interface ChatTranslateAction {
-  clearTranslate: (id: string) => void;
+  clearTTS: (id: string) => void;
 
+  clearTranslate: (id: string) => void;
   /**
    * 翻译消息
    * @param id
    */
   translateMessage: (id: string, targetLang: string) => Promise<void>;
+  ttsMessage: (id: string, init?: boolean) => void;
 }
 
 export const chatTranslate: StateCreator<
@@ -31,6 +33,15 @@ export const chatTranslate: StateCreator<
   [],
   ChatTranslateAction
 > = (set, get) => ({
+  clearTTS: (id) => {
+    get().dispatchMessage({
+      id,
+      key: 'tts',
+      type: 'updateMessageExtra',
+      value: null,
+    });
+  },
+
   clearTranslate: (id) => {
     get().dispatchMessage({
       id,
@@ -85,4 +96,16 @@ export const chatTranslate: StateCreator<
 
     toggleChatLoading(false);
   },
+
+  ttsMessage: (id, init) => {
+    const { dispatchMessage } = get();
+    dispatchMessage({
+      id,
+      key: 'tts',
+      type: 'updateMessageExtra',
+      value: {
+        init: Boolean(init),
+      },
+    });
+  },
 });
diff --git a/src/store/session/slices/chat/selectors/utils.test.ts b/src/store/session/slices/chat/selectors/utils.test.ts
index 9439869170b1..7477d0feea6a 100644
--- a/src/store/session/slices/chat/selectors/utils.test.ts
+++ b/src/store/session/slices/chat/selectors/utils.test.ts
@@ -41,6 +41,14 @@ beforeEach(() => {
         temperature: 0.6,
       },
       systemRole: '',
+      tts: {
+        ttsService: 'openai',
+        sttLocale: 'auto',
+        showAllLocaleVoice: false,
+        voice: {
+          openai: 'alloy',
+        },
+      },
     },
     type: 'agent',
     createAt: 1690110700808,
@@ -215,6 +223,14 @@ describe('organizeChats', () => {
         params: {
           temperature: 0.6,
         },
+        tts: {
+          ttsService: 'openai',
+          sttLocale: 'auto',
+          showAllLocaleVoice: false,
+          voice: {
+            openai: 'alloy',
+          },
+        },
         systemRole: '',
       },
       createAt: 1690110700808,
diff --git a/src/store/session/slices/session/reducers/session.test.ts b/src/store/session/slices/session/reducers/session.test.ts
index 9ce78745d773..a0c5324e7913 100644
--- a/src/store/session/slices/session/reducers/session.test.ts
+++ b/src/store/session/slices/session/reducers/session.test.ts
@@ -312,6 +312,14 @@ describe('sessionsReducer', () => {
             model: 'gpt-3.5-turbo',
             params: {},
             systemRole: 'system-role',
+            tts: {
+              showAllLocaleVoice: false,
+              sttLocale: 'auto',
+              ttsService: 'openai',
+              voice: {
+                openai: 'alloy',
+              },
+            },
           },
           type: 'agent',
           meta: {
@@ -354,6 +362,14 @@ describe('sessionsReducer', () => {
             model: 'gpt-3.5-turbo',
             params: {},
             systemRole: 'system',
+            tts: {
+              showAllLocaleVoice: false,
+              sttLocale: 'auto',
+              ttsService: 'openai',
+              voice: {
+                openai: 'alloy',
+              },
+            },
           },
         } as LobeAgentSession,
         session2: {
@@ -379,6 +395,14 @@ describe('sessionsReducer', () => {
         draft.session1.config = {
           model: LanguageModel.GPT4,
           params: {},
+          tts: {
+            ttsService: 'openai',
+            sttLocale: 'auto',
+            showAllLocaleVoice: false,
+            voice: {
+              openai: 'alloy',
+            },
+          },
           systemRole: 'system',
         };
       });
diff --git a/src/types/chatMessage.ts b/src/types/chatMessage.ts
index 5d8ee8b115c6..ef5f35e9e9cf 100644
--- a/src/types/chatMessage.ts
+++ b/src/types/chatMessage.ts
@@ -22,6 +22,10 @@ export interface OpenAIFunctionCall {
 export interface ChatTranslate extends Translate {
   content?: string;
 }
+
+export interface ChatTTS {
+  init?: boolean;
+}
 export interface ChatMessage extends BaseDataModel {
   /**
    * @title 内容
@@ -34,6 +38,8 @@ export interface ChatMessage extends BaseDataModel {
     fromModel?: string;
     // 翻译
     translate?: ChatTranslate;
+    // TTS
+    tts?: ChatTTS;
   } & Record<string, any>;
 
   files?: string[];
diff --git a/src/types/session.ts b/src/types/session.ts
index 08d747ebdc94..3cc1ee2b935c 100644
--- a/src/types/session.ts
+++ b/src/types/session.ts
@@ -34,6 +34,19 @@ interface LobeSessionBase extends BaseDataModel {
   type: LobeSessionType;
 }
 
+export type TTSServer = 'openai' | 'edge' | 'microsoft';
+
+export interface LobeAgentTTSConfig {
+  showAllLocaleVoice?: boolean;
+  sttLocale: 'auto' | string;
+  ttsService: TTSServer;
+  voice: {
+    edge?: string;
+    microsoft?: string;
+    openai: string;
+  };
+}
+
 export interface LobeAgentConfig {
   compressThreshold?: number;
   displayMode?: 'chat' | 'docs';
@@ -72,6 +85,10 @@ export interface LobeAgentConfig {
    * 系统角色
    */
   systemRole: string;
+  /**
+   * 语音服务
+   */
+  tts: LobeAgentTTSConfig;
 }
 
 /**
diff --git a/src/types/settings.ts b/src/types/settings.ts
index 68f0a1fe5557..b09677b7b084 100644
--- a/src/types/settings.ts
+++ b/src/types/settings.ts
@@ -57,9 +57,19 @@ export interface OpenAIConfig {
   useAzure?: boolean;
 }
 
-export type GlobalLLMConfig = {
+export interface GlobalLLMConfig {
   openAI: OpenAIConfig;
-};
+}
+
+export type STTServer = 'openai' | 'browser';
+export interface GlobalTTSConfig {
+  openAI: {
+    sttModel: 'whisper-1';
+    ttsModel: 'tts-1' | 'tts-1-hd';
+  };
+  sttAutoStop: boolean;
+  sttServer: STTServer;
+}
 
 export type LLMBrand = keyof GlobalLLMConfig;
 
@@ -69,6 +79,7 @@ export type LLMBrand = keyof GlobalLLMConfig;
 export interface GlobalSettings extends GlobalBaseSettings {
   defaultAgent: GlobalDefaultAgent;
   languageModel: GlobalLLMConfig;
+  tts: GlobalTTSConfig;
 }
 
 export type ConfigKeys = keyof GlobalSettings;