diff --git a/NLTK Text Classification/main.py b/NLTK Text Classification/main.py index 58fa624..db8c76c 100644 --- a/NLTK Text Classification/main.py +++ b/NLTK Text Classification/main.py @@ -29,4 +29,13 @@ def language_features(name): print(classifier.classify(language_features('Incredibile'))) # WRONG print(classifier.prob_classify(language_features('Incredibile')).prob("english")) # WRONG -classifier.show_most_informative_features(10) \ No newline at end of file +classifier.show_most_informative_features(10) + +wiki_eng = open('data/wiki_eng.txt','r',encoding='utf-8').read().split() +wiki_eng = list(set(wiki_eng)) +wiki_lang = [] +for w in wiki_eng: + wiki_lang.append([w,classifier.classify(language_features(w))]) +words_ita = len([w for w in wiki_lang if w[1] == 'italian']) +words_eng = len([w for w in wiki_lang if w[1] == 'english']) +print('Italian: {} | English: {}'.format(words_ita,words_eng)) \ No newline at end of file