Removing is_portuguese verification

Pendulun · Pendulun · commit d185eb04d134 · 2022-06-18T21:38:45.000-03:00
diff --git a/parserClasses/myparser.py b/parserClasses/myparser.py
@@ -14,14 +14,9 @@ class TextParser():
     @classmethod
     def get_distribuition_of(cls, text:str) -> dict:
         
-        tokens = [word for word in TextParser.pre_proccess(text) if word not in TextParser.COMPLETE_FILTER]
-
-        for token_idx in range(len(tokens)):
-            tokens[token_idx] = TextParser.PORTUGUESE_STEMMER.stem(tokens[token_idx])
-
+        tokens = [TextParser.PORTUGUESE_STEMMER.stem(word) for word in TextParser.pre_proccess(text)]
         token_frequency = FreqDist(tokens)
         tokens = None
-
         return dict(token_frequency.items())
     
     @classmethod
@@ -32,7 +27,11 @@ def pre_proccess(cls, text:str) -> list:
 
         text = TextParser.split_on_upper(text)
         text = text.rstrip('\n').strip()
-        return [word.lower() for word in word_tokenize(text) if len(word) <= TextParser.MAX_TAM_WORD]
+        for word in word_tokenize(text):
+            if len(word) <= TextParser.MAX_TAM_WORD:
+                word_lower = word.lower()
+                if word_lower not in TextParser.COMPLETE_FILTER:
+                    yield word_lower
 
     @classmethod
     def stem(cls, text_list:list) -> list:
@@ -49,11 +48,6 @@ def split_on_upper(cls, text:str) -> str:
         
         return text.replace("  ", " ").lstrip()
     
-    @classmethod
-    def is_portuguese(cls, text:str) -> bool:
-        #return TextParser.language_of(text[:50]) in TextParser.ACCEPTED_LANGUAGES_DETECTED
-        return True
-    
     @classmethod
     def language_of(cls, text:str) -> str:
         return TextParser.TEXT_CLASSIFIER.guess_language(text)
diff --git a/parserClasses/test_parser.py b/parserClasses/test_parser.py
@@ -92,13 +92,5 @@ def test_split_on_upper_empty_str(self):
         text = ""
         self.assertEqual(TextParser.split_on_upper(text), text)
 
-    def test_is_portuguese(self):
-        text = "A sorte favorece os corajosos"
-        self.assertTrue(TextParser.is_portuguese(text))
-    
-    def test_is_not_portuguese(self):
-        text = "Tis but a scratch"
-        self.assertFalse(TextParser.is_portuguese(text))
-
 if __name__ == '__main__':
     main()