sentiment-analysis-nltk-example.py

#!/usr/bin/python3
# PYTHON EXAMPLE TO DO SENTIMENT ANALYSIS ON TWEETS
# Author: James Campbell
# Date: 2015-07-01
# Updated: 2015-11-16
# USE FOR PYTHON 3 only
import nltk
import sys
from sys import exit

pos_tweets = [('I love this car', 'positive'),
              ('This view is amazing', 'positive'),
              ('I feel great this morning', 'positive'),
              ('I am so excited about the concert', 'positive'),
              ('He is my best friend', 'positive'),
              ('Going well', 'positive'),
              ('Thank you', 'positive'),
              ('Hope you are doing well', 'positive'),
              ('I am very happy', 'positive'),
              ('Good for you', 'positive'),
              ('It is all good. I know about it and I accept it.', 'positive'),
              ('This is really good!', 'positive'),
              ('Tomorrow is going to be fun.', 'positive'),
              ('Smiling all around.', 'positive'),
              ('These are great apples today.', 'positive'),
              ('How about them apples? Thomas is a happy boy.', 'positive'),
              ('Thomas is very zen. He is well-mannered.', 'positive')]

neg_tweets = [('I do not like this car', 'negative'),
              ('This view is horrible', 'negative'),
              ('I feel tired this morning', 'negative'),
              ('I am not looking forward to the concert', 'negative'),
              ('He is my enemy', 'negative'),
              ('I am a bad boy', 'negative'),
              ('This is not good', 'negative'),
              ('I am bothered by this', 'negative'),
              ('I am not connected with this', 'negative'),
              ('Sadistic creep you ass. Die.', 'negative'),
              ('All sorts of crazy and scary as hell.', 'negative'),
              ('Not his emails, no.', 'negative'),
              ('His father is dead. Returned obviously.', 'negative'),
              ('He has a bomb.', 'negative'),
              ('Too fast to be on foot. We cannot catch them.', 'negative')]

tweets = []
for (words, sentiment) in pos_tweets + neg_tweets:
    words_filtered = [e.lower() for e in words.split() if len(e) >= 3]
    tweets.append((words_filtered, sentiment))

def get_words_in_tweets(tweets):
    all_words = []
    for (words, sentiment) in tweets:
      all_words.extend(words)
    return all_words

def get_word_features(wordlist):
    wordlist = nltk.FreqDist(wordlist)
    word_features = wordlist.keys()
    return word_features

def extract_features(document):
    document_words = set(document)
    features = {}
    for word in word_features:
        features['contains(%s)' % word] = (word in document_words)
    return features

word_features = get_word_features(get_words_in_tweets(tweets))

training_set = nltk.classify.apply_features(extract_features, tweets)
classifier = nltk.NaiveBayesClassifier.train(training_set)

# optional to save your classifier so you can load it elsewhere without having to rebuild training set every time
save_classifier = open("tweetposneg.pickle","wb")
pickle.dump(classifier, save_classifier)
save_classifier.close()

# optional load from classifier that was saved previously
# classifier_f = open("naivebayes.pickle", "rb")
# classifier = pickle.load(classifier_f)
# classifier_f.close()

runtweets = []  # setup to import a list of tweets here if you wish into a python list
if len(sys.argv) > 1:  # if param passed 4 name of text file w/ list of tweets
    tweetfile = sys.argv[1]
    with open(tweetfile, "r") as ins:
      for line in ins:
        runtweets.append(line)
runtweets.append('I am a bad boy')  # test tweet incase
poscount = 0
negcount = 0
for tweett in runtweets:
  valued = classifier.classify(extract_features(tweett.split()))
  print (valued)
  if valued == 'negative':
    negcount = negcount + 1
  else:
    poscount = poscount + 1
    print ('Positive count: %s \nNegative count: %s' % (poscount,negcount))
  exit()