Skip to content

Commit

Permalink
NLTK Tutorial fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
Inzaniak committed Aug 18, 2018
1 parent a0a0cf6 commit db7af5e
Showing 1 changed file with 10 additions and 1 deletion.
11 changes: 10 additions & 1 deletion NLTK Text Classification/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,13 @@ def language_features(name):
print(classifier.classify(language_features('Incredibile'))) # WRONG
print(classifier.prob_classify(language_features('Incredibile')).prob("english")) # WRONG

classifier.show_most_informative_features(10)
classifier.show_most_informative_features(10)

wiki_eng = open('data/wiki_eng.txt','r',encoding='utf-8').read().split()
wiki_eng = list(set(wiki_eng))
wiki_lang = []
for w in wiki_eng:
wiki_lang.append([w,classifier.classify(language_features(w))])
words_ita = len([w for w in wiki_lang if w[1] == 'italian'])
words_eng = len([w for w in wiki_lang if w[1] == 'english'])
print('Italian: {} | English: {}'.format(words_ita,words_eng))

0 comments on commit db7af5e

Please sign in to comment.