@@ -21,6 +21,16 @@ public interface IKuromojiTokenizer : ITokenizer
21
21
[ JsonFormatter ( typeof ( NullableStringBooleanFormatter ) ) ]
22
22
bool ? DiscardPunctuation { get ; set ; }
23
23
24
+ /// <summary>
25
+ /// Whether original compound tokens should be discarded from the output with
26
+ /// <see cref="KuromojiTokenizationMode.Search"/> <see cref="Mode"/>. Defaults to `false`.
27
+ /// <para />
28
+ /// Valid in Elasticsearch 7.9.0+
29
+ /// </summary>
30
+ [ DataMember ( Name = "discard_compound_token" ) ]
31
+ [ JsonFormatter ( typeof ( NullableStringBooleanFormatter ) ) ]
32
+ bool ? DiscardCompoundToken { get ; set ; }
33
+
24
34
/// <summary>
25
35
/// The tokenization mode determines how the tokenizer handles compound and unknown words.
26
36
/// </summary>
@@ -64,6 +74,9 @@ public class KuromojiTokenizer : TokenizerBase, IKuromojiTokenizer
64
74
/// <inheritdoc />
65
75
public bool ? DiscardPunctuation { get ; set ; }
66
76
77
+ /// <inheritdoc />
78
+ public bool ? DiscardCompoundToken { get ; set ; }
79
+
67
80
/// <inheritdoc />
68
81
public KuromojiTokenizationMode ? Mode { get ; set ; }
69
82
@@ -86,32 +99,35 @@ public class KuromojiTokenizerDescriptor
86
99
{
87
100
protected override string Type => "kuromoji_tokenizer" ;
88
101
bool ? IKuromojiTokenizer . DiscardPunctuation { get ; set ; }
89
-
102
+ bool ? IKuromojiTokenizer . DiscardCompoundToken { get ; set ; }
90
103
KuromojiTokenizationMode ? IKuromojiTokenizer . Mode { get ; set ; }
91
104
int ? IKuromojiTokenizer . NBestCost { get ; set ; }
92
105
string IKuromojiTokenizer . NBestExamples { get ; set ; }
93
106
string IKuromojiTokenizer . UserDictionary { get ; set ; }
94
107
IEnumerable < string > IKuromojiTokenizer . UserDictionaryRules { get ; set ; }
95
108
96
- /// <inheritdoc />
109
+ /// <inheritdoc cref="IKuromojiTokenizer.Mode" />
97
110
public KuromojiTokenizerDescriptor Mode ( KuromojiTokenizationMode ? mode ) => Assign ( mode , ( a , v ) => a . Mode = v ) ;
98
111
99
- /// <inheritdoc />
112
+ /// <inheritdoc cref="IKuromojiTokenizer.DiscardPunctuation" />
100
113
public KuromojiTokenizerDescriptor DiscardPunctuation ( bool ? discard = true ) => Assign ( discard , ( a , v ) => a . DiscardPunctuation = v ) ;
101
114
102
- /// <inheritdoc />
115
+ /// <inheritdoc cref="IKuromojiTokenizer.DiscardCompoundToken" />
116
+ public KuromojiTokenizerDescriptor DiscardCompoundToken ( bool ? discard = true ) => Assign ( discard , ( a , v ) => a . DiscardCompoundToken = v ) ;
117
+
118
+ /// <inheritdoc cref="IKuromojiTokenizer.UserDictionary" />
103
119
public KuromojiTokenizerDescriptor UserDictionary ( string userDictionary ) => Assign ( userDictionary , ( a , v ) => a . UserDictionary = v ) ;
104
120
105
- /// <inheritdoc />
121
+ /// <inheritdoc cref="IKuromojiTokenizer.NBestExamples" />
106
122
public KuromojiTokenizerDescriptor NBestExamples ( string examples ) => Assign ( examples , ( a , v ) => a . NBestExamples = v ) ;
107
123
108
- /// <inheritdoc />
124
+ /// <inheritdoc cref="IKuromojiTokenizer.NBestCost" />
109
125
public KuromojiTokenizerDescriptor NBestCost ( int ? cost ) => Assign ( cost , ( a , v ) => a . NBestCost = v ) ;
110
126
111
- /// <inheritdoc />
127
+ /// <inheritdoc cref="IKuromojiTokenizer.UserDictionaryRules" />
112
128
public KuromojiTokenizerDescriptor UserDictionaryRules ( IEnumerable < string > rules ) => Assign ( rules , ( a , v ) => a . UserDictionaryRules = rules ) ;
113
129
114
- /// <inheritdoc />
130
+ /// <inheritdoc cref="IKuromojiTokenizer.UserDictionaryRules" />
115
131
public KuromojiTokenizerDescriptor UserDictionaryRules ( params string [ ] rules ) => Assign ( rules , ( a , v ) => a . UserDictionaryRules = rules ) ;
116
132
}
117
133
}
0 commit comments