PyThaiNLP
diff --git a/‎examples/soundex.py‎
Lines changed: 12 additions & 3 deletions b/‎examples/soundex.py‎
Lines changed: 12 additions & 3 deletions
diff --git a/‎examples/spell.py‎
Lines changed: 20 additions & 13 deletions b/‎examples/spell.py‎
Lines changed: 20 additions & 13 deletions
diff --git a/‎pythainlp/__init__.py‎
Lines changed: 2 additions & 3 deletions b/‎pythainlp/__init__.py‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎pythainlp/corpus/alphabet.py‎
Lines changed: 6 additions & 2 deletions b/‎pythainlp/corpus/alphabet.py‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎pythainlp/corpus/conceptnet.py‎
Lines changed: 1 addition & 2 deletions b/‎pythainlp/corpus/conceptnet.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎pythainlp/corpus/country.py‎
Lines changed: 4 additions & 1 deletion b/‎pythainlp/corpus/country.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎pythainlp/corpus/make-stopword.tool‎
Lines changed: 0 additions & 58 deletions b/‎pythainlp/corpus/make-stopword.tool‎
Lines changed: 0 additions & 58 deletions
diff --git a/‎pythainlp/corpus/provinces.csv‎
Lines changed: 1 addition & 1 deletion b/‎pythainlp/corpus/provinces.csv‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pythainlp/corpus/provinces.py‎
Lines changed: 1 addition & 1 deletion b/‎pythainlp/corpus/provinces.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pythainlp/corpus/wordnet.py‎
Lines changed: 5 additions & 2 deletions b/‎pythainlp/corpus/wordnet.py‎
Lines changed: 5 additions & 2 deletions
@@ -1,7 +1,16 @@
 # -*- coding: utf-8 -*-
 
-from pythainlp.soundex import LK82, Udom83
+from pythainlp.soundex import lk82, metasound, udom83
 
-print(LK82("รถ") == LK82("รด"))
+texts = ["บูรณะ", "บูรณการ", "มัก", "มัค", "มรรค", "ลัก", "รัก", "รักษ์", ""]
+for text in texts:
+    print(
+        "{} - lk82: {} - udom83: {} - metasound: {}".format(
+            text, lk82(text), udom83(text), metasound(text)
+        )
+    )
 
-print(Udom83("วรร") == Udom83("วัน"))
+# check equivalence
+print(lk82("รถ") == lk82("รด"))
+print(udom83("วรร") == udom83("วัน"))
+print(metasound("นพ") == metasound("นภ"))
@@ -1,21 +1,28 @@
 # -*- coding: utf-8 -*-
 
+from pythainlp.corpus import ttc
 from pythainlp.spell import spell
-from pythainlp.spell.pn import spell as pn_tnc_spell
-from pythainlp.spell.pn import correct as pn_tnc_correct
 from pythainlp.spell.pn import NorvigSpellChecker
-from pythainlp.corpus import ttc
+from pythainlp.spell.pn import correct as pn_tnc_correct
+from pythainlp.spell.pn import spell as pn_tnc_spell
 
-# checker from pythainlp.spell module (generic)
-spell("สี่เหลียม")  # ['สี่เหลี่ยม']
-# spell("สี่เหลียม", engine="hunspell")  # available in some Linux systems
+# spell checker from pythainlp.spell module (generic)
+print(spell("สี่เหลียม"))  # ['สี่เหลี่ยม']
+# print(spell("สี่เหลียม", engine="hunspell"))  # available in some Linux systems
 
-# checker from pythainlp.spell.pn module (specified algorithm - Peter Norvig's)
-pn_tnc_spell("เหลืยม")
-pn_tnc_correct("เหลืยม")
+# spell checker from pythainlp.spell.pn module (specified algorithm - Peter Norvig's)
+print(pn_tnc_spell("เหลืยม"))
+print(pn_tnc_correct("เหลืยม"))
 
-# checker from pythainlp.spell.pn module (specified algorithm, custom dictionary)
+
+# spell checker from pythainlp.spell.pn module (specified algorithm, custom dictionary)
 ttc_word_freqs = ttc.get_word_frequency_all()
-pn_ttc_spell_checker = NorvigSpellChecker(custom_dict=ttc_word_freqs)
-pn_ttc_spell_checker.spell("เหลืยม")
-pn_ttc_spell_checker.correct("เหลืยม")
+pn_ttc_checker = NorvigSpellChecker(custom_dict=ttc_word_freqs)
+print(pn_ttc_checker.spell("เหลืยม"))
+print(pn_ttc_checker.correct("เหลืยม"))
+
+# apply different dictionary filter when creating spell checker
+pn_tnc_checker = NorvigSpellChecker()
+print(len(pn_tnc_checker.dictionary()))
+pn_tnc_checker_no_filter = NorvigSpellChecker(dict_filter=None)
+print(len(pn_tnc_checker_no_filter.dictionary()))
@@ -3,14 +3,13 @@
 from pythainlp.collation import collation
 from pythainlp.date import now
 from pythainlp.keywords import find_keyword
-from pythainlp.metasound import metasound
 from pythainlp.rank import rank
 from pythainlp.romanization import romanize
 from pythainlp.sentiment import sentiment
-from pythainlp.soundex import LK82, Udom83
+from pythainlp.soundex import lk82, metasound, udom83
 from pythainlp.spell import spell
 from pythainlp.tag import pos_tag
-from pythainlp.Text import Text
+from pythainlp.text import Text
 from pythainlp.tokenize import etcc, sent_tokenize, tcc, word_tokenize
 from pythainlp.util import bigrams, ngrams, trigram
 
 
@@ -1,9 +1,13 @@
 # -*- coding: utf-8 -*-
+"""
+Thai alphabets
+"""
 
 
 def get_data():
-    """เป็นคำสั่งสำหรับดึงตัวอักษร ก - ฮ ในภาษาไทย
-    คืนค่า list
+    """
+    Get a list of Thai alphabets (from Ko Kai \u0e01 to Ho Nokhuk \u0e2e)
+    คืนค่า list ที่มีพญัชนะไทย ก (\u0e01) - ฮ (\u0e2e)
     """
     return [
         "ก",
 
@@ -1,7 +1,6 @@
 # -*- coding: utf-8 -*-
-
 """
-นี่คือ API สำหรับดึงข้อมูลมาจาก http://conceptnet.io
+ดึงข้อมูลจาก http://conceptnet.io
 """
 import requests
 
 
@@ -1,9 +1,12 @@
 # -*- coding: utf-8 -*-
+"""
+Country list
+"""
 
 
 def get_data():
     """
-    Return list of countries, in Thai language
+    Return a list of countries, in Thai language
     """
     return [
         "อัฟกานิสถาน",
 
@@ -74,4 +74,4 @@
 อุดรธานี,อด,UDN
 อุตรดิตถ์,อต,UTT
 อุทัยธานี,อน,UTI
-อุบลราชธานี,อบ,UBN
+อุบลราชธานี,อบ,UBN
@@ -6,7 +6,7 @@
 
 def get_data():
     """
-    Return list of provinces in Thailand, in Thai language
+    Return a list of provinces in Thailand, in Thai language
     """
     return [
         "กระบี่",
 
@@ -1,15 +1,18 @@
 # -*- coding: utf-8 -*-
 """
-API ตัวใหม่ เริ่มใช้ตั้งแต่ PyThaiNLP 1.4 เป็นต้นไป
+WordNet
 """
 import nltk
 
 try:
     nltk.data.find("corpora/omw")
+except LookupError:
+    nltk.download("omw")
+
+try:
     nltk.data.find("corpora/wordnet")
 except LookupError:
     nltk.download("wordnet")
-    nltk.download("omw")
 
 from nltk.corpus import wordnet