@@ -21,6 +21,14 @@ public interface IKuromojiTokenizer : ITokenizer
21
21
[ JsonFormatter ( typeof ( NullableStringBooleanFormatter ) ) ]
22
22
bool ? DiscardPunctuation { get ; set ; }
23
23
24
+ /// <summary>
25
+ /// Whether original compound tokens should be discarded from the output with
26
+ /// <see cref="KuromojiTokenizationMode.Search"/> <see cref="Mode"/>. Defaults to `false`.
27
+ /// </summary>
28
+ [ DataMember ( Name = "discard_compound_token" ) ]
29
+ [ JsonFormatter ( typeof ( NullableStringBooleanFormatter ) ) ]
30
+ bool ? DiscardCompoundToken { get ; set ; }
31
+
24
32
/// <summary>
25
33
/// The tokenization mode determines how the tokenizer handles compound and unknown words.
26
34
/// </summary>
@@ -64,6 +72,9 @@ public class KuromojiTokenizer : TokenizerBase, IKuromojiTokenizer
64
72
/// <inheritdoc />
65
73
public bool ? DiscardPunctuation { get ; set ; }
66
74
75
+ /// <inheritdoc />
76
+ public bool ? DiscardCompoundToken { get ; set ; }
77
+
67
78
/// <inheritdoc />
68
79
public KuromojiTokenizationMode ? Mode { get ; set ; }
69
80
@@ -86,32 +97,35 @@ public class KuromojiTokenizerDescriptor
86
97
{
87
98
protected override string Type => "kuromoji_tokenizer" ;
88
99
bool ? IKuromojiTokenizer . DiscardPunctuation { get ; set ; }
89
-
100
+ bool ? IKuromojiTokenizer . DiscardCompoundToken { get ; set ; }
90
101
KuromojiTokenizationMode ? IKuromojiTokenizer . Mode { get ; set ; }
91
102
int ? IKuromojiTokenizer . NBestCost { get ; set ; }
92
103
string IKuromojiTokenizer . NBestExamples { get ; set ; }
93
104
string IKuromojiTokenizer . UserDictionary { get ; set ; }
94
105
IEnumerable < string > IKuromojiTokenizer . UserDictionaryRules { get ; set ; }
95
106
96
- /// <inheritdoc />
107
+ /// <inheritdoc cref="IKuromojiTokenizer.Mode" />
97
108
public KuromojiTokenizerDescriptor Mode ( KuromojiTokenizationMode ? mode ) => Assign ( mode , ( a , v ) => a . Mode = v ) ;
98
109
99
- /// <inheritdoc />
110
+ /// <inheritdoc cref="IKuromojiTokenizer.DiscardPunctuation" />
100
111
public KuromojiTokenizerDescriptor DiscardPunctuation ( bool ? discard = true ) => Assign ( discard , ( a , v ) => a . DiscardPunctuation = v ) ;
101
112
102
- /// <inheritdoc />
113
+ /// <inheritdoc cref="IKuromojiTokenizer.DiscardCompoundToken" />
114
+ public KuromojiTokenizerDescriptor DiscardCompoundToken ( bool ? discard = true ) => Assign ( discard , ( a , v ) => a . DiscardCompoundToken = v ) ;
115
+
116
+ /// <inheritdoc cref="IKuromojiTokenizer.UserDictionary" />
103
117
public KuromojiTokenizerDescriptor UserDictionary ( string userDictionary ) => Assign ( userDictionary , ( a , v ) => a . UserDictionary = v ) ;
104
118
105
- /// <inheritdoc />
119
+ /// <inheritdoc cref="IKuromojiTokenizer.NBestExamples" />
106
120
public KuromojiTokenizerDescriptor NBestExamples ( string examples ) => Assign ( examples , ( a , v ) => a . NBestExamples = v ) ;
107
121
108
- /// <inheritdoc />
122
+ /// <inheritdoc cref="IKuromojiTokenizer.NBestCost" />
109
123
public KuromojiTokenizerDescriptor NBestCost ( int ? cost ) => Assign ( cost , ( a , v ) => a . NBestCost = v ) ;
110
124
111
- /// <inheritdoc />
125
+ /// <inheritdoc cref="IKuromojiTokenizer.UserDictionaryRules" />
112
126
public KuromojiTokenizerDescriptor UserDictionaryRules ( IEnumerable < string > rules ) => Assign ( rules , ( a , v ) => a . UserDictionaryRules = rules ) ;
113
127
114
- /// <inheritdoc />
128
+ /// <inheritdoc cref="IKuromojiTokenizer.UserDictionaryRules" />
115
129
public KuromojiTokenizerDescriptor UserDictionaryRules ( params string [ ] rules ) => Assign ( rules , ( a , v ) => a . UserDictionaryRules = rules ) ;
116
130
}
117
131
}
0 commit comments