Skip to content

Commit

Permalink
fix: fix a perf issue that cut zh words unnecessary repeatedly
Browse files Browse the repository at this point in the history
  • Loading branch information
weareoutman committed Oct 9, 2024
1 parent 3eddfab commit 870dc88
Showing 1 changed file with 14 additions and 17 deletions.
31 changes: 14 additions & 17 deletions docusaurus-search-local/src/client/utils/smartTerms.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,29 +14,26 @@ export function smartTerms(
tokens: string[],
zhDictionary: string[]
): SmartTerm[] {
const terms: SmartTerm[] = [];
const tokenTerms = tokens.map((token) => {
if (/\p{Unified_Ideograph}/u.test(token)) {
return cutZhWords(token, zhDictionary);
} else {
return [{ value: token }];
}
});

function cutMixedWords(subTokens: string[], carry: SmartTerm): void {
if (subTokens.length === 0) {
// Get all possible combinations of terms.
const terms: SmartTerm[] = [];
function combine(index: number, carry: SmartTerm): void {
if (index === tokenTerms.length) {
terms.push(carry);
return;
}
const token = subTokens[0];
if (/\p{Unified_Ideograph}/u.test(token)) {
const terms = cutZhWords(token, zhDictionary);
for (const term of terms) {
const nextCarry = carry.concat(...term);
cutMixedWords(subTokens.slice(1), nextCarry);
}
} else {
const nextCarry = carry.concat({
value: token,
});
cutMixedWords(subTokens.slice(1), nextCarry);
for (const term of tokenTerms[index]) {
combine(index + 1, carry.concat(term));
}
}

cutMixedWords(tokens, []);
combine(0, []);

return terms;
}

0 comments on commit 870dc88

Please sign in to comment.