@@ -92,31 +92,35 @@ def testx_sent_tokenize(self):
9292 # Use default engine (crfcut)
9393 self .assertEqual (sent_tokenize (None ), [])
9494 self .assertEqual (sent_tokenize ("" ), [])
95-
9695 self .assertEqual (
97- sent_tokenize (SENT_1 , engine = "crfcut" ),
96+ sent_tokenize (SENT_1 ),
9897 SENT_1_TOKS ,
9998 )
10099 self .assertEqual (
101- sent_tokenize (SENT_2 , engine = "crfcut" ),
100+ sent_tokenize (SENT_2 ),
102101 SENT_2_TOKS ,
103102 )
104103 self .assertEqual (
105- sent_tokenize (SENT_3 , engine = "crfcut" ),
104+ sent_tokenize (SENT_3 ),
106105 SENT_3_TOKS ,
107106 )
107+
108108 self .assertEqual (
109- sent_tokenize (SENT_1 ),
109+ sent_tokenize (SENT_1 , engine = "crfcut" ),
110110 SENT_1_TOKS ,
111111 )
112112 self .assertEqual (
113- sent_tokenize (SENT_2 ),
113+ sent_tokenize (SENT_2 , engine = "crfcut" ),
114114 SENT_2_TOKS ,
115115 )
116116 self .assertEqual (
117- sent_tokenize (SENT_3 ),
117+ sent_tokenize (SENT_3 , engine = "crfcut" ),
118118 SENT_3_TOKS ,
119119 )
120+ self .assertEqual (
121+ sent_tokenize (SENT_4 , engine = "crfcut" ),
122+ [["ผม" , "กิน" , "ข้าว" , " " , "\n " , "เธอ" , "เล่น" , "เกม" ]],
123+ )
120124 self .assertIsNotNone (
121125 sent_tokenize (
122126 SENT_1 ,
@@ -135,6 +139,7 @@ def testx_sent_tokenize(self):
135139 engine = "tltk" ,
136140 ),
137141 )
142+
138143 self .assertIsNotNone (
139144 sent_tokenize (
140145 SENT_1 ,
@@ -153,6 +158,11 @@ def testx_sent_tokenize(self):
153158 engine = "thaisum" ,
154159 ),
155160 )
161+ self .assertEqual (
162+ sent_tokenize (SENT_4 , engine = "thaisum" ),
163+ [["ผม" , "กิน" , "ข้าว" , " " , "เธอ" , "เล่น" , "เกม" ]],
164+ )
165+
156166 self .assertIsNotNone (
157167 sent_tokenize (
158168 SENT_3 ,
@@ -177,14 +187,6 @@ def testx_sent_tokenize(self):
177187 # engine="wtp-large",
178188 # ),
179189 # )
180- self .assertEqual (
181- sent_tokenize (SENT_4 , engine = "crfcut" ),
182- [["ผม" , "กิน" , "ข้าว" , " " , "\n " , "เธอ" , "เล่น" , "เกม" ]],
183- )
184- self .assertEqual (
185- sent_tokenize (SENT_4 , engine = "thaisum" ),
186- [["ผม" , "กิน" , "ข้าว" , " " , "เธอ" , "เล่น" , "เกม" ]],
187- )
188190
189191 def testx_word_tokenize (self ):
190192 self .assertIsNotNone (word_tokenize (TEXT_1 , engine = "nlpo3" ))
0 commit comments