37
37
import org .apache .lucene .analysis .cjk .CJKAnalyzer ;
38
38
import org .apache .lucene .analysis .cjk .CJKBigramFilter ;
39
39
import org .apache .lucene .analysis .cjk .CJKWidthFilter ;
40
+ import org .apache .lucene .analysis .ckb .SoraniAnalyzer ;
40
41
import org .apache .lucene .analysis .ckb .SoraniNormalizationFilter ;
41
42
import org .apache .lucene .analysis .commongrams .CommonGramsFilter ;
42
43
import org .apache .lucene .analysis .core .DecimalDigitFilter ;
52
53
import org .apache .lucene .analysis .de .GermanAnalyzer ;
53
54
import org .apache .lucene .analysis .de .GermanNormalizationFilter ;
54
55
import org .apache .lucene .analysis .de .GermanStemFilter ;
56
+ import org .apache .lucene .analysis .el .GreekAnalyzer ;
55
57
import org .apache .lucene .analysis .en .EnglishAnalyzer ;
56
58
import org .apache .lucene .analysis .en .KStemFilter ;
57
59
import org .apache .lucene .analysis .en .PorterStemFilter ;
60
+ import org .apache .lucene .analysis .es .SpanishAnalyzer ;
58
61
import org .apache .lucene .analysis .eu .BasqueAnalyzer ;
62
+ import org .apache .lucene .analysis .fa .PersianAnalyzer ;
59
63
import org .apache .lucene .analysis .fa .PersianNormalizationFilter ;
60
64
import org .apache .lucene .analysis .fi .FinnishAnalyzer ;
61
65
import org .apache .lucene .analysis .fr .FrenchAnalyzer ;
66
+ import org .apache .lucene .analysis .ga .IrishAnalyzer ;
62
67
import org .apache .lucene .analysis .gl .GalicianAnalyzer ;
68
+ import org .apache .lucene .analysis .hi .HindiAnalyzer ;
63
69
import org .apache .lucene .analysis .hi .HindiNormalizationFilter ;
70
+ import org .apache .lucene .analysis .hu .HungarianAnalyzer ;
64
71
import org .apache .lucene .analysis .hy .ArmenianAnalyzer ;
72
+ import org .apache .lucene .analysis .id .IndonesianAnalyzer ;
65
73
import org .apache .lucene .analysis .in .IndicNormalizationFilter ;
74
+ import org .apache .lucene .analysis .it .ItalianAnalyzer ;
75
+ import org .apache .lucene .analysis .lt .LithuanianAnalyzer ;
76
+ import org .apache .lucene .analysis .lv .LatvianAnalyzer ;
66
77
import org .apache .lucene .analysis .miscellaneous .ASCIIFoldingFilter ;
67
78
import org .apache .lucene .analysis .miscellaneous .DisableGraphAttribute ;
68
79
import org .apache .lucene .analysis .miscellaneous .KeywordRepeatFilter ;
79
90
import org .apache .lucene .analysis .ngram .NGramTokenFilter ;
80
91
import org .apache .lucene .analysis .ngram .NGramTokenizer ;
81
92
import org .apache .lucene .analysis .nl .DutchAnalyzer ;
93
+ import org .apache .lucene .analysis .no .NorwegianAnalyzer ;
82
94
import org .apache .lucene .analysis .path .PathHierarchyTokenizer ;
83
95
import org .apache .lucene .analysis .pattern .PatternTokenizer ;
84
96
import org .apache .lucene .analysis .payloads .DelimitedPayloadTokenFilter ;
85
97
import org .apache .lucene .analysis .payloads .TypeAsPayloadTokenFilter ;
98
+ import org .apache .lucene .analysis .pt .PortugueseAnalyzer ;
86
99
import org .apache .lucene .analysis .reverse .ReverseStringFilter ;
100
+ import org .apache .lucene .analysis .ro .RomanianAnalyzer ;
101
+ import org .apache .lucene .analysis .ru .RussianAnalyzer ;
87
102
import org .apache .lucene .analysis .shingle .ShingleFilter ;
88
103
import org .apache .lucene .analysis .snowball .SnowballFilter ;
89
104
import org .apache .lucene .analysis .standard .ClassicFilter ;
90
105
import org .apache .lucene .analysis .standard .ClassicTokenizer ;
91
106
import org .apache .lucene .analysis .standard .StandardAnalyzer ;
92
107
import org .apache .lucene .analysis .standard .UAX29URLEmailTokenizer ;
108
+ import org .apache .lucene .analysis .sv .SwedishAnalyzer ;
109
+ import org .apache .lucene .analysis .th .ThaiAnalyzer ;
93
110
import org .apache .lucene .analysis .th .ThaiTokenizer ;
94
111
import org .apache .lucene .analysis .tr .ApostropheFilter ;
112
+ import org .apache .lucene .analysis .tr .TurkishAnalyzer ;
95
113
import org .apache .lucene .analysis .util .ElisionFilter ;
96
114
import org .elasticsearch .common .logging .DeprecationLogger ;
97
115
import org .elasticsearch .common .logging .Loggers ;
@@ -130,6 +148,8 @@ public Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAn
130
148
analyzers .put ("standard_html_strip" , StandardHtmlStripAnalyzerProvider ::new );
131
149
analyzers .put ("pattern" , PatternAnalyzerProvider ::new );
132
150
analyzers .put ("snowball" , SnowballAnalyzerProvider ::new );
151
+
152
+ // Language analyzers:
133
153
analyzers .put ("arabic" , ArabicAnalyzerProvider ::new );
134
154
analyzers .put ("armenian" , ArmenianAnalyzerProvider ::new );
135
155
analyzers .put ("basque" , BasqueAnalyzerProvider ::new );
@@ -147,6 +167,24 @@ public Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAn
147
167
analyzers .put ("french" , FrenchAnalyzerProvider ::new );
148
168
analyzers .put ("galician" , GalicianAnalyzerProvider ::new );
149
169
analyzers .put ("german" , GermanAnalyzerProvider ::new );
170
+ analyzers .put ("greek" , GreekAnalyzerProvider ::new );
171
+ analyzers .put ("hindi" , HindiAnalyzerProvider ::new );
172
+ analyzers .put ("hungarian" , HungarianAnalyzerProvider ::new );
173
+ analyzers .put ("indonesian" , IndonesianAnalyzerProvider ::new );
174
+ analyzers .put ("irish" , IrishAnalyzerProvider ::new );
175
+ analyzers .put ("italian" , ItalianAnalyzerProvider ::new );
176
+ analyzers .put ("latvian" , LatvianAnalyzerProvider ::new );
177
+ analyzers .put ("lithuanian" , LithuanianAnalyzerProvider ::new );
178
+ analyzers .put ("norwegian" , NorwegianAnalyzerProvider ::new );
179
+ analyzers .put ("persian" , PersianAnalyzerProvider ::new );
180
+ analyzers .put ("portuguese" , PortugueseAnalyzerProvider ::new );
181
+ analyzers .put ("romanian" , RomanianAnalyzerProvider ::new );
182
+ analyzers .put ("russian" , RussianAnalyzerProvider ::new );
183
+ analyzers .put ("sorani" , SoraniAnalyzerProvider ::new );
184
+ analyzers .put ("spanish" , SpanishAnalyzerProvider ::new );
185
+ analyzers .put ("swedish" , SwedishAnalyzerProvider ::new );
186
+ analyzers .put ("turkish" , TurkishAnalyzerProvider ::new );
187
+ analyzers .put ("thai" , ThaiAnalyzerProvider ::new );
150
188
return analyzers ;
151
189
}
152
190
@@ -248,13 +286,15 @@ public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
248
286
@ Override
249
287
public List <PreBuiltAnalyzerProviderFactory > getPreBuiltAnalyzerProviderFactories () {
250
288
List <PreBuiltAnalyzerProviderFactory > analyzers = new ArrayList <>();
251
- analyzers .add (new PreBuiltAnalyzerProviderFactory ("standard_html_strip" , CachingStrategy .LUCENE ,
289
+ analyzers .add (new PreBuiltAnalyzerProviderFactory ("standard_html_strip" , CachingStrategy .ELASTICSEARCH ,
252
290
() -> new StandardHtmlStripAnalyzer (CharArraySet .EMPTY_SET )));
253
291
analyzers .add (new PreBuiltAnalyzerProviderFactory ("pattern" , CachingStrategy .ELASTICSEARCH ,
254
292
() -> new PatternAnalyzer (Regex .compile ("\\ W+" /*PatternAnalyzer.NON_WORD_PATTERN*/ , null ), true ,
255
293
CharArraySet .EMPTY_SET )));
256
294
analyzers .add (new PreBuiltAnalyzerProviderFactory ("snowball" , CachingStrategy .LUCENE ,
257
295
() -> new SnowballAnalyzer ("English" , StopAnalyzer .ENGLISH_STOP_WORDS_SET )));
296
+
297
+ // Language analyzers:
258
298
analyzers .add (new PreBuiltAnalyzerProviderFactory ("arabic" , CachingStrategy .LUCENE , ArabicAnalyzer ::new ));
259
299
analyzers .add (new PreBuiltAnalyzerProviderFactory ("armenian" , CachingStrategy .LUCENE , ArmenianAnalyzer ::new ));
260
300
analyzers .add (new PreBuiltAnalyzerProviderFactory ("basque" , CachingStrategy .LUCENE , BasqueAnalyzer ::new ));
@@ -263,7 +303,7 @@ public List<PreBuiltAnalyzerProviderFactory> getPreBuiltAnalyzerProviderFactorie
263
303
analyzers .add (new PreBuiltAnalyzerProviderFactory ("bulgarian" , CachingStrategy .LUCENE , BulgarianAnalyzer ::new ));
264
304
analyzers .add (new PreBuiltAnalyzerProviderFactory ("catalan" , CachingStrategy .LUCENE , CatalanAnalyzer ::new ));
265
305
// chinese analyzer: only for old indices, best effort
266
- analyzers .add (new PreBuiltAnalyzerProviderFactory ("chinese" , CachingStrategy .LUCENE , StandardAnalyzer ::new ));
306
+ analyzers .add (new PreBuiltAnalyzerProviderFactory ("chinese" , CachingStrategy .ONE , StandardAnalyzer ::new ));
267
307
analyzers .add (new PreBuiltAnalyzerProviderFactory ("cjk" , CachingStrategy .LUCENE , CJKAnalyzer ::new ));
268
308
analyzers .add (new PreBuiltAnalyzerProviderFactory ("czech" , CachingStrategy .LUCENE , CzechAnalyzer ::new ));
269
309
analyzers .add (new PreBuiltAnalyzerProviderFactory ("danish" , CachingStrategy .LUCENE , DanishAnalyzer ::new ));
@@ -273,6 +313,24 @@ public List<PreBuiltAnalyzerProviderFactory> getPreBuiltAnalyzerProviderFactorie
273
313
analyzers .add (new PreBuiltAnalyzerProviderFactory ("french" , CachingStrategy .LUCENE , FrenchAnalyzer ::new ));
274
314
analyzers .add (new PreBuiltAnalyzerProviderFactory ("galician" , CachingStrategy .LUCENE , GalicianAnalyzer ::new ));
275
315
analyzers .add (new PreBuiltAnalyzerProviderFactory ("german" , CachingStrategy .LUCENE , GermanAnalyzer ::new ));
316
+ analyzers .add (new PreBuiltAnalyzerProviderFactory ("greek" , CachingStrategy .LUCENE , GreekAnalyzer ::new ));
317
+ analyzers .add (new PreBuiltAnalyzerProviderFactory ("hindi" , CachingStrategy .LUCENE , HindiAnalyzer ::new ));
318
+ analyzers .add (new PreBuiltAnalyzerProviderFactory ("hungarian" , CachingStrategy .LUCENE , HungarianAnalyzer ::new ));
319
+ analyzers .add (new PreBuiltAnalyzerProviderFactory ("indonesian" , CachingStrategy .LUCENE , IndonesianAnalyzer ::new ));
320
+ analyzers .add (new PreBuiltAnalyzerProviderFactory ("irish" , CachingStrategy .LUCENE , IrishAnalyzer ::new ));
321
+ analyzers .add (new PreBuiltAnalyzerProviderFactory ("italian" , CachingStrategy .LUCENE , ItalianAnalyzer ::new ));
322
+ analyzers .add (new PreBuiltAnalyzerProviderFactory ("latvian" , CachingStrategy .LUCENE , LatvianAnalyzer ::new ));
323
+ analyzers .add (new PreBuiltAnalyzerProviderFactory ("lithuanian" , CachingStrategy .LUCENE , LithuanianAnalyzer ::new ));
324
+ analyzers .add (new PreBuiltAnalyzerProviderFactory ("norwegian" , CachingStrategy .LUCENE , NorwegianAnalyzer ::new ));
325
+ analyzers .add (new PreBuiltAnalyzerProviderFactory ("persian" , CachingStrategy .LUCENE , PersianAnalyzer ::new ));
326
+ analyzers .add (new PreBuiltAnalyzerProviderFactory ("portuguese" , CachingStrategy .LUCENE , PortugueseAnalyzer ::new ));
327
+ analyzers .add (new PreBuiltAnalyzerProviderFactory ("romanian" , CachingStrategy .LUCENE , RomanianAnalyzer ::new ));
328
+ analyzers .add (new PreBuiltAnalyzerProviderFactory ("russian" , CachingStrategy .LUCENE , RussianAnalyzer ::new ));
329
+ analyzers .add (new PreBuiltAnalyzerProviderFactory ("sorani" , CachingStrategy .LUCENE , SoraniAnalyzer ::new ));
330
+ analyzers .add (new PreBuiltAnalyzerProviderFactory ("spanish" , CachingStrategy .LUCENE , SpanishAnalyzer ::new ));
331
+ analyzers .add (new PreBuiltAnalyzerProviderFactory ("swedish" , CachingStrategy .LUCENE , SwedishAnalyzer ::new ));
332
+ analyzers .add (new PreBuiltAnalyzerProviderFactory ("turkish" , CachingStrategy .LUCENE , TurkishAnalyzer ::new ));
333
+ analyzers .add (new PreBuiltAnalyzerProviderFactory ("thai" , CachingStrategy .LUCENE , ThaiAnalyzer ::new ));
276
334
return analyzers ;
277
335
}
278
336
0 commit comments