Skip to content

Commit 5896365

Browse files
committed
pattern.ru spelling, lexicon
1 parent a749212 commit 5896365

File tree

7 files changed

+134384
-3
lines changed

7 files changed

+134384
-3
lines changed

examples/03-en/05-tagset.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
print(parse_fr("les chats noirs", chunks=False)) # les/DT chats/NNS noirs/JJ
3535
print(parse_it("i gatti neri", chunks=False)) # i/DT gatti/NNS neri/JJ
3636
print(parse_nl("de zwarte katten", chunks=False)) # de/DT zwarte/JJ katten/NNS
37-
print(parse_ru("Да здравствует Сталин!", chunks=False))
37+
print(parse_ru("какой сегодня хороший день!", chunks=False)) # какой/DT сегодня/RB хороший/JJ день/NN !/.
3838
print("")
3939

4040
# In some cases, this means the original tagset is mapped to Penn Treebank:

pattern/text/en/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ def load(self, path=None):
153153
)
154154

155155
spelling = Spelling(
156-
path = os.path.join(MODULE, "en-spelling.txt")
156+
path = os.path.join(MODULE, "en-spelling.txt")
157157
)
158158

159159

pattern/text/ru/__init__.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,11 @@
4242
SLASH, WORD, POS, CHUNK, PNP, REL, ANCHOR, LEMMA, AND, OR
4343
)
4444

45+
# Import spelling base class.
46+
from pattern.text import (
47+
Spelling
48+
)
49+
4550
sys.path.pop(0)
4651

4752
#--- Russian PARSER --------------------------------------------------------------------------------
@@ -68,6 +73,11 @@ def find_tags(self, tokens, **kwargs):
6873
)
6974

7075

76+
spelling = Spelling(
77+
path=os.path.join(MODULE, "ru-spelling.txt"),
78+
alphabet='CYRILLIC'
79+
)
80+
7181

7282
def tokenize(s, *args, **kwargs):
7383
""" Returns a list of sentences, where punctuation marks have been split from words.
@@ -84,4 +94,10 @@ def parse(s, *args, **kwargs):
8494
def parsetree(s, *args, **kwargs):
8595
""" Returns a parsed Text from the given string.
8696
"""
87-
return Text(parse(s, *args, **kwargs))
97+
return Text(parse(s, *args, **kwargs))
98+
99+
100+
def suggest(w):
101+
""" Returns a list of (word, confidence)-tuples of spelling corrections.
102+
"""
103+
return spelling.suggest(w)
File renamed without changes.

0 commit comments

Comments
 (0)