Skip to content

Commit bd44c65

Browse files
committed
Add attacut test and attacut docs
1 parent 82e71e4 commit bd44c65

File tree

2 files changed

+10
-0
lines changed

2 files changed

+10
-0
lines changed

docs/api/tokenize.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ attacut
4545
+++++++
4646
.. automodule:: pythainlp.tokenize.attacut
4747

48+
.. autoclass:: pythainlp.tokenize.attacut.AttacutTokenizer
49+
:members:
50+
4851
tcc
4952
+++
5053
.. automodule:: pythainlp.tokenize.tcc

tests/test_tokenize.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,13 @@ def test_word_tokenize_attacut(self):
363363
word_tokenize("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย", engine="attacut"),
364364
["ฉัน", "รัก", "ภาษา", "ไทย", "เพราะ", "ฉัน", "เป็น", "คน", "ไทย"],
365365
)
366+
self.assertEqual(
367+
attacut.segment("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย", model="attacut-sc"),
368+
["ฉัน", "รัก", "ภาษา", "ไทย", "เพราะ", "ฉัน", "เป็น", "คน", "ไทย"],
369+
)
370+
self.assertIsNotNone(
371+
attacut.segment("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย", model="attacut-c")
372+
)
366373

367374
def test_sent_tokenize(self):
368375
self.assertEqual(sent_tokenize(None), [])

0 commit comments

Comments
 (0)