Skip to content

Commit d185eb0

Browse files
committed
Removing is_portuguese verification
1 parent f231a49 commit d185eb0

File tree

2 files changed

+6
-20
lines changed

2 files changed

+6
-20
lines changed

parserClasses/myparser.py

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,9 @@ class TextParser():
1414
@classmethod
1515
def get_distribuition_of(cls, text:str) -> dict:
1616

17-
tokens = [word for word in TextParser.pre_proccess(text) if word not in TextParser.COMPLETE_FILTER]
18-
19-
for token_idx in range(len(tokens)):
20-
tokens[token_idx] = TextParser.PORTUGUESE_STEMMER.stem(tokens[token_idx])
21-
17+
tokens = [TextParser.PORTUGUESE_STEMMER.stem(word) for word in TextParser.pre_proccess(text)]
2218
token_frequency = FreqDist(tokens)
2319
tokens = None
24-
2520
return dict(token_frequency.items())
2621

2722
@classmethod
@@ -32,7 +27,11 @@ def pre_proccess(cls, text:str) -> list:
3227

3328
text = TextParser.split_on_upper(text)
3429
text = text.rstrip('\n').strip()
35-
return [word.lower() for word in word_tokenize(text) if len(word) <= TextParser.MAX_TAM_WORD]
30+
for word in word_tokenize(text):
31+
if len(word) <= TextParser.MAX_TAM_WORD:
32+
word_lower = word.lower()
33+
if word_lower not in TextParser.COMPLETE_FILTER:
34+
yield word_lower
3635

3736
@classmethod
3837
def stem(cls, text_list:list) -> list:
@@ -49,11 +48,6 @@ def split_on_upper(cls, text:str) -> str:
4948

5049
return text.replace(" ", " ").lstrip()
5150

52-
@classmethod
53-
def is_portuguese(cls, text:str) -> bool:
54-
#return TextParser.language_of(text[:50]) in TextParser.ACCEPTED_LANGUAGES_DETECTED
55-
return True
56-
5751
@classmethod
5852
def language_of(cls, text:str) -> str:
5953
return TextParser.TEXT_CLASSIFIER.guess_language(text)

parserClasses/test_parser.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -92,13 +92,5 @@ def test_split_on_upper_empty_str(self):
9292
text = ""
9393
self.assertEqual(TextParser.split_on_upper(text), text)
9494

95-
def test_is_portuguese(self):
96-
text = "A sorte favorece os corajosos"
97-
self.assertTrue(TextParser.is_portuguese(text))
98-
99-
def test_is_not_portuguese(self):
100-
text = "Tis but a scratch"
101-
self.assertFalse(TextParser.is_portuguese(text))
102-
10395
if __name__ == '__main__':
10496
main()

0 commit comments

Comments
 (0)