added some debugging advice

meli-lewis · Mar 14, 2017 · 7478190 · 7478190
1 parent 6a1eff8
commit 7478190
Show file tree

Hide file tree

Showing 4 changed files with 93 additions and 8 deletions.
diff --git a/README.md b/README.md
@@ -15,8 +15,6 @@ This is a growing list of scripts we've put together to make social data mining
 
 ### Setup
 
-
-
 #### Before you begin
 
 1. If you don’t already have Python installed, start by getting [Python up and running](http://docs.python-guide.org/en/latest/starting/installation/). Also have `git` installed.
@@ -39,6 +37,20 @@ cd social-data-scripts
 ```
 pip install -r requirements.txt
 ```
+or
+```
+sudo pip install -r requirements.txt
+```
+If you have problems with installing the dependencies through
+```
+pip install requests
+pip install tweepy --ignore-installed six
+```
+or
+```
+sudo pip install requests
+sudo pip install tweepy --ignore-installed six
+```
 3. Make a secrets.py file that is modeled after the `secrets.py.example` file by going into the `scripts` directory and running these bash commands
 ```
 cd scripts
@@ -51,9 +63,9 @@ Now you have a `secrets.py` file! 🤗 Open it up in a text editor of your choic
 
 #### Scripts
 
-* [tweet_dumper.py](https://github.com/buzzfeed-openlab/big-picture/blob/master/scripts/tweet_dumper.py): Up to 3200 tweets from an individual account (includes tweet id, time stamp, location, text, retweet count, favorite count (though the favorite count is inaccurate for retweets), whether something was a manual retweet, how it was tweeted (Tweetdek, Android, etc.)). This script was modified from [@Yanofsky](https://gist.github.com/yanofsky/5436496)'s original script.
-* [twitter_bio_info_compiler.py](https://github.com/buzzfeed-openlab/big-picture/blob/master/scripts/twitter_bio_info_compiler.py): Twitter user bios (name, display name, bio, followers count (at time of scraping),  following count (at time of scraping), when the account was created, location given in the bio)
-* [twitter_searcher.py](https://github.com/buzzfeed-openlab/big-picture/blob/master/scripts/twitter_searcher.py): You can search Twitter via its search API going back 7 days and grab tweets (id, author name, timestamp when it was created, favorites (again, unreliable), retweets, text)
+* [twitter_tweet_dumper.py](https://github.com/buzzfeed-openlab/big-picture/blob/master/scripts/scripts/twitter_tweet_dumper.py): Up to 3200 tweets from an individual account (includes tweet id, time stamp, location, text, retweet count, favorite count (though the favorite count is inaccurate for retweets), whether something was a manual retweet, how it was tweeted (Tweetdek, Android, etc.)). This script was modified from [@Yanofsky](https://gist.github.com/yanofsky/5436496)'s original script.
+* [twitter_bio_info_compiler.py](https://github.com/buzzfeed-openlab/big-picture/blob/master/scripts/scripts/twitter_bio_info_compiler.py): Twitter user bios (name, display name, bio, followers count (at time of scraping),  following count (at time of scraping), when the account was created, location given in the bio)
+* [twitter_searcher.py](https://github.com/buzzfeed-openlab/big-picture/blob/master/scripts/scripts/twitter_searcher.py): You can search Twitter via its search API going back 7 days and grab tweets (id, author name, timestamp when it was created, favorites (again, unreliable), retweets, text)
 
 ### Using Facebook's API
 

diff --git a/scripts/twitter_searcher.py b/scripts/twitter_searcher.py
@@ -28,7 +28,7 @@ def limit_handled(cursor):
 
 # search terms
 # find a full list of conventions here: https://dev.twitter.com/rest/public/search#query-operators
-searchterm = "\"Queen Bey\""
+searchterm = "\"nothing fucked me up\""
 
 # Open/Create a file to append data
 csvFile = open_csv_w('%s-result.csv' % searchterm)
@@ -49,7 +49,7 @@ def limit_handled(cursor):
                     # point of time you want the search to start
                     since="2017-01-10",
                     # point of time you want the search to end
-                    until="2017-02-28",
+                    until="2017-03-10",
                     lang="en").items()):
     #Write a row to the csv file/ I use encode utf-8
     csvWriter.writerow([tweet.id_str,

diff --git a/scripts/twitter_tweet_dumper.py b/scripts/twitter_tweet_dumper.py
@@ -73,4 +73,4 @@ def get_all_tweets(screen_name):
 
 if __name__ == '__main__':
 	#pass in the username of the account you want to download
-	get_all_tweets("potus")
+	get_all_tweets("sidbow56")
diff --git a/scripts/twitter_tweet_replies.py b/scripts/twitter_tweet_replies.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python
+# encoding: utf-8
+
+import tweepy #https://github.com/tweepy/tweepy
+import csv
+import time
+from utils import open_csv_w
+
+# import authentication credentials
+from secrets import TWITTER_C_KEY, TWITTER_C_SECRET, TWITTER_A_KEY, TWITTER_A_SECRET
+
+#authorize twitter, initialize tweepy
+auth = tweepy.OAuthHandler(TWITTER_C_KEY, TWITTER_C_SECRET)
+auth.set_access_token(TWITTER_A_KEY, TWITTER_A_SECRET)
+api = tweepy.API(auth)
+
+# Twitter API limit handler; this helps you deal with the fact that Twitter only allows you to ping its API a set number of times
+def limit_handled(cursor):
+    while True:
+        try:
+            yield cursor.next()
+        except tweepy.error.TweepError:
+            print("waiting 15 minutes for Twitter to let me get more tweets ᕕ( ᐛ )ᕗ")
+            time.sleep(15 * 60)
+
+# counter for console messages
+counter  = 0;
+
+# search terms
+# find a full list of conventions here: https://dev.twitter.com/rest/public/search#query-operators
+username = "@nhannahjones"
+tweet_id = "839474548319916032"
+
+# Open/Create a file to append data
+csvFile = open_csv_w('%s-result.csv' % username)
+#Use csv Writer
+csvWriter = csv.writer(csvFile)
+# these are the headers of your csv
+csvWriter.writerow(["id",
+                    "authorname",
+                    "created_at",
+                    "favorites",
+                    "retweets",
+                    "text",
+                    "in_reply_to_status_id"])
+
+# loop to put tweets into the csv
+for tweet in limit_handled(tweepy.Cursor(api.search,
+                    q=username,
+                    # note that Twitter only makes available a sample of tweets from the last 7 days: https://dev.twitter.com/rest/public/search
+                    # point of time you want the search to start
+                    since="2017-01-07",
+                    # point of time you want the search to end
+                    until="2017-03-10",
+                    lang="en").items()):
+    #Write a row to the csv file/ I use encode utf-8
+    # if tweet.in_reply_to_status_id == tweet_id:
+    #         print(tweet.in_reply_to_status_id)
+    #         print(tweet)
+    csvWriter.writerow([tweet.id_str,
+                    tweet.author.screen_name,
+                    tweet.created_at,
+                    tweet.favorite_count,
+                    tweet.retweet_count,
+                    tweet.text.encode("utf-8"),
+                    tweet.in_reply_to_status_id])
+    # this code prints information in your console while you're getting tweets
+    counter += 1
+    if counter % 100 == 0:
+        print("%s tweets collected" % counter)
+
+# close the file
+csvFile.close()