|
20 | 20 | ) |
21 | 21 | from pythainlp.tokenize.ssg import segment as ssg_segment |
22 | 22 | from pythainlp.util import dict_trie |
| 23 | +from pythainlp.tokenize import nercut as tokenize_nercut |
23 | 24 |
|
24 | 25 |
|
25 | 26 | class TestTokenizePackage(unittest.TestCase): |
@@ -230,6 +231,7 @@ def test_word_tokenize(self): |
230 | 231 | self.assertIsNotNone(word_tokenize(self.text_1, engine="icu")) |
231 | 232 | self.assertIsNotNone(word_tokenize(self.text_1, engine="deepcut")) |
232 | 233 | self.assertIsNotNone(word_tokenize(self.text_1, engine="attacut")) |
| 234 | + self.assertIsNotNone(word_tokenize(self.text_1, engine="nercut")) |
233 | 235 | with self.assertRaises(ValueError): |
234 | 236 | word_tokenize("หมอนทอง", engine="XX") # engine does not exist |
235 | 237 |
|
@@ -364,6 +366,15 @@ def test_word_tokenize_attacut(self): |
364 | 366 | ["ฉัน", "รัก", "ภาษา", "ไทย", "เพราะ", "ฉัน", "เป็น", "คน", "ไทย"], |
365 | 367 | ) |
366 | 368 |
|
| 369 | + def test_word_tokenize_nercut(self): |
| 370 | + self.assertEqual(tokenize_nercut.segment(None), []) |
| 371 | + self.assertEqual(tokenize_nercut.segment(""), []) |
| 372 | + self.assertIsNotNone( |
| 373 | + tokenize_nercut.segment("ทดสอบ") |
| 374 | + ) |
| 375 | + self.assertIsNotNone(tokenize_nercut.segment("ทดสอบ")) |
| 376 | + self.assertIsNotNone(word_tokenize("ทดสอบ", engine="nercut")) |
| 377 | + |
367 | 378 | def test_sent_tokenize(self): |
368 | 379 | self.assertEqual(sent_tokenize(None), []) |
369 | 380 | self.assertEqual(sent_tokenize(""), []) |
|
0 commit comments