Skip to content

Commit

Permalink
🐛 fix: Fix text split
Browse files Browse the repository at this point in the history
  • Loading branch information
canisminor1990 committed Nov 25, 2024
1 parent 5939f0a commit 2e6c1bc
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 31 deletions.
5 changes: 3 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
"dependencies": {
"@babel/runtime": "^7.26.0",
"lodash-es": "^4.17.21",
"markdown-to-txt": "^2.0.1",
"query-string": "^9.1.1",
"react-error-boundary": "^4.1.2",
"remark-gfm": "^3.0.1",
Expand All @@ -80,7 +81,7 @@
},
"devDependencies": {
"@commitlint/cli": "^19.6.0",
"@lobehub/i18n-cli": "^1.20.0",
"@lobehub/i18n-cli": "^1.20.1",
"@lobehub/lint": "^1.24.4",
"@types/lodash-es": "^4.17.12",
"@types/node": "^20.17.7",
Expand All @@ -93,7 +94,7 @@
"commitlint": "^19.6.0",
"concurrently": "^9.1.0",
"dumi": "^2.4.14",
"dumi-theme-lobehub": "^1.10.6",
"dumi-theme-lobehub": "^1.10.8",
"eslint": "^8.57.1",
"father": "^4.5.1",
"husky": "^9.1.7",
Expand Down
52 changes: 32 additions & 20 deletions src/core/utils/splitTextIntoSegments.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import { markdownToTxt } from 'markdown-to-txt';

const toHalfWidthAndCleanSpace = (str: string): string => {
return str
return markdownToTxt(str)
.replaceAll(/[\uFF01-\uFF5E]/g, (ch) => String.fromCharCode(ch.charCodeAt(0) - 0xFE_E0))
.replaceAll('\u3000', ' ')
.replaceAll('。', '.')
Expand All @@ -22,32 +24,42 @@ const toHalfWidthAndCleanSpace = (str: string): string => {
.replaceAll(/\s+/g, ' ');
};

export const splitTextIntoSegments = (text: string, maxChars: number = 100): string[] => {
export const splitTextIntoSegments = (text: string, chunkSize: number = 100): string[] => {
text = toHalfWidthAndCleanSpace(text);

const sentences = text.match(/[^!.;?]+[!.;?]+/g) || [];
const segments: string[] = [];
let currentSegment = '';
const chunks: string[] = [];
const paragraphs = text.split('\n');
let currentChunk = '';

sentences.forEach((sentence) => {
if ((currentSegment + sentence).length > maxChars) {
if (currentSegment.length > 0) {
segments.push(currentSegment.trim());
currentSegment = '';
}
if (sentence.length > maxChars) {
segments.push(sentence.trim());
} else {
currentSegment = sentence;
function addChunk(chunk: string) {
if (chunk.trim()) {
chunks.push(chunk.trim());
}
}

for (const paragraph of paragraphs) {
if (currentChunk.length + paragraph.length + 1 > chunkSize && currentChunk.length > 0) {
addChunk(currentChunk);
currentChunk = '';
}

if (paragraph.length > chunkSize) {
const sentences = paragraph.match(/[^!.?]+[!.?]+/g) || [paragraph];
for (const sentence of sentences) {
if (currentChunk.length + sentence.length + 1 > chunkSize && currentChunk.length > 0) {
addChunk(currentChunk);
currentChunk = '';
}
currentChunk += (currentChunk ? ' ' : '') + sentence.trim();
}
} else {
currentSegment += sentence;
currentChunk += (currentChunk ? '\n' : '') + paragraph;
}
});
}

if (currentSegment.length > 0) {
segments.push(currentSegment.trim());
if (currentChunk) {
addChunk(currentChunk);
}

return segments.filter(Boolean);
return chunks;
};
16 changes: 8 additions & 8 deletions src/react/useEdgeSpeech/demos/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,19 @@ import { Button, Input } from 'antd';
import { Volume2 } from 'lucide-react';
import { Flexbox } from 'react-layout-kit';

import { EDGE_SPEECH_BACKEND_URL } from '../../_util/api';
import { genLevaOptions } from '../../_util/leva';

const defaultText = '这是一段使用 Edge Speech 的语音演示';

export default () => {
const store = useCreateStore();

const api: any = useControls(
{
serviceUrl: EDGE_SPEECH_BACKEND_URL,
},
{ store },
);
// const api: any = useControls(
// {
// serviceUrl: EDGE_SPEECH_BACKEND_URL,
// },
// { store },
// );

const options: any = useControls(
{
Expand All @@ -31,9 +30,10 @@ export default () => {
);

const { setText, isGlobalLoading, start, stop, audio } = useEdgeSpeech(defaultText, {
api,
// api,
options,
});

return (
<StoryBook levaStore={store}>
<Flexbox gap={8}>
Expand Down
1 change: 1 addition & 0 deletions src/react/useEdgeSpeech/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ export const useEdgeSpeech = (defaultText: string, init: EdgeSpeechOptions) => {
options.voice,
text,
async (segmentText: string) => {
console.log(segmentText);
const instance = new EdgeSpeechTTS({ ...api, locale });
const res = await instance.create({ input: segmentText, options });
setResponse(res);
Expand Down
3 changes: 2 additions & 1 deletion src/react/useTTS/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ export const useTTS = (
}, [handleReset]);

const { isLoading, error, mutate } = useSWR(
shouldFetch && textArray?.length > 0 ? [key, textArray?.[index]] : null,
shouldFetch && textArray?.length > 0 ? [key, textArray?.[index]].join('-') : null,
async () => await fetchTTS(textArray[index]),
{
onError: (err, ...rest) => {
Expand Down Expand Up @@ -81,6 +81,7 @@ export const useTTS = (

useEffect(() => {
const texts = splitTextIntoSegments(text);

handleReset(texts);
return () => {
handleReset();
Expand Down

0 comments on commit 2e6c1bc

Please sign in to comment.