Skip to content

Commit 231e3af

Browse files
authored
LUCENE-9687: Hunspell suggestions: reduce work in the findSimilarDictionaryEntries loop (#2451)
The loop is called a lot of times, and some allocations and method calls can be spared
1 parent 19e6560 commit 231e3af

File tree

1 file changed

+8
-10
lines changed

1 file changed

+8
-10
lines changed

lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/GeneratingSuggester.java

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,9 @@ private List<Weighted<Root<String>>> findSimilarDictionaryEntries(
6565
String word, WordCase originalCase) {
6666
Comparator<Weighted<Root<String>>> natural = Comparator.naturalOrder();
6767
PriorityQueue<Weighted<Root<String>>> roots = new PriorityQueue<>(natural.reversed());
68+
List<Root<String>> entries = new ArrayList<>();
69+
boolean ignoreTitleCaseRoots = originalCase == WordCase.LOWER && !dictionary.hasLanguage("de");
70+
EnumSet<NGramOptions> options = EnumSet.of(NGramOptions.LONGER_WORSE);
6871

6972
IntsRefFSTEnum<IntsRef> fstEnum = new IntsRefFSTEnum<>(dictionary.words);
7073
InputOutput<IntsRef> mapping;
@@ -78,18 +81,15 @@ private List<Weighted<Root<String>>> findSimilarDictionaryEntries(
7881
}
7982

8083
String root = toString(key);
81-
List<Root<String>> entries = filterSuitableEntries(root, mapping.output);
84+
filterSuitableEntries(root, mapping.output, entries);
8285
if (entries.isEmpty()) continue;
8386

84-
if (originalCase == WordCase.LOWER
85-
&& WordCase.caseOf(root) == WordCase.TITLE
86-
&& !dictionary.hasLanguage("de")) {
87+
if (ignoreTitleCaseRoots && WordCase.caseOf(root) == WordCase.TITLE) {
8788
continue;
8889
}
8990

9091
String lower = dictionary.toLowerCase(root);
91-
int sc =
92-
ngram(3, word, lower, EnumSet.of(NGramOptions.LONGER_WORSE)) + commonPrefix(word, root);
92+
int sc = ngram(3, word, lower, options) + commonPrefix(word, root);
9393

9494
if (roots.size() == MAX_ROOTS && sc < roots.peek().score) {
9595
continue;
@@ -129,8 +129,8 @@ private static String toString(IntsRef key) {
129129
return new String(chars);
130130
}
131131

132-
private List<Root<String>> filterSuitableEntries(String word, IntsRef forms) {
133-
List<Root<String>> result = new ArrayList<>();
132+
private void filterSuitableEntries(String word, IntsRef forms, List<Root<String>> result) {
133+
result.clear();
134134
for (int i = 0; i < forms.length; i += dictionary.formStep()) {
135135
int entryId = forms.ints[forms.offset + i];
136136
if (dictionary.hasFlag(entryId, dictionary.forbiddenword)
@@ -141,8 +141,6 @@ private List<Root<String>> filterSuitableEntries(String word, IntsRef forms) {
141141
}
142142
result.add(new Root<>(word, entryId));
143143
}
144-
145-
return result;
146144
}
147145

148146
private List<Weighted<String>> expandRoots(

0 commit comments

Comments
 (0)