@@ -30,6 +30,12 @@ public interface INoriTokenizer : ITokenizer
30
30
[ DataMember ( Name = "decompound_mode" ) ]
31
31
NoriDecompoundMode ? DecompoundMode { get ; set ; }
32
32
33
+ /// <summary>
34
+ /// Whether punctuation should be discarded from the output. Defaults to `true`.
35
+ /// </summary>
36
+ [ DataMember ( Name = "discard_punctuation" ) ]
37
+ bool ? DiscardPunctuation { get ; set ; }
38
+
33
39
/// <summary>
34
40
/// The Nori tokenizer uses the mecab-ko-dic dictionary by default. A user_dictionary with custom nouns (NNG) may be
35
41
/// appended to
@@ -57,6 +63,9 @@ public class NoriTokenizer : TokenizerBase, INoriTokenizer
57
63
/// <inheritdoc cref="INoriTokenizer.DecompoundMode" />
58
64
public NoriDecompoundMode ? DecompoundMode { get ; set ; }
59
65
66
+ /// <inheritdoc cref="INoriTokenizer.DiscardPunctuation" />
67
+ public bool ? DiscardPunctuation { get ; set ; }
68
+
60
69
/// <inheritdoc cref="INoriTokenizer.UserDictionary" />
61
70
public string UserDictionary { get ; set ; }
62
71
@@ -73,6 +82,7 @@ public class NoriTokenizerDescriptor
73
82
NoriDecompoundMode ? INoriTokenizer . DecompoundMode { get ; set ; }
74
83
string INoriTokenizer . UserDictionary { get ; set ; }
75
84
IEnumerable < string > INoriTokenizer . UserDictionaryRules { get ; set ; }
85
+ bool ? INoriTokenizer . DiscardPunctuation { get ; set ; }
76
86
77
87
/// <inheritdoc cref="INoriTokenizer.DecompoundMode" />
78
88
public NoriTokenizerDescriptor DecompoundMode ( NoriDecompoundMode ? mode ) => Assign ( mode , ( a , v ) => a . DecompoundMode = v ) ;
@@ -85,5 +95,8 @@ public class NoriTokenizerDescriptor
85
95
86
96
/// <inheritdoc cref="INoriTokenizer.UserDictionaryRules" />
87
97
public NoriTokenizerDescriptor UserDictionaryRules ( IEnumerable < string > rules ) => Assign ( rules , ( a , v ) => a . UserDictionaryRules = v ) ;
98
+
99
+ /// <inheritdoc cref="INoriTokenizer.DiscardPunctuation" />
100
+ public NoriTokenizerDescriptor DiscardPunctuation ( bool ? discard = true ) => Assign ( discard , ( a , v ) => a . DiscardPunctuation = v ) ;
88
101
}
89
102
}
0 commit comments