Skip to content

Commit 4bcc841

Browse files
Added twitter scraper
1 parent 5272940 commit 4bcc841

File tree

1 file changed

+75
-0
lines changed

1 file changed

+75
-0
lines changed

twitter-scraper/twitter_scraper.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
#This code searches for tweets with a particuar keyword and writes certain fields into a CSV file
2+
3+
import sys, csv
4+
import twitter
5+
import os
6+
import tweepy
7+
8+
# Replace the API_KEY and API_SECRET with your application's key and secret.
9+
#This code is using AppAuthHandler, not OAuthHandler to get higher limits, 2.5 times.
10+
auth = tweepy.AppAuthHandler('j2UAZfXuk6iitAjnLjbFcmn0y', 'Q9X7g4eAhyElO8u5VI183QwRCUF1sXrZs8m9poGt6Q1pmN4cOw')
11+
api = tweepy.API(auth, wait_on_rate_limit=True,
12+
wait_on_rate_limit_notify=True)
13+
14+
15+
if (not api):
16+
print ("Can't Authenticate")
17+
sys.exit(-1)
18+
def clean(val):
19+
clean = ""
20+
if val:
21+
clean = val.encode('utf-8')
22+
return clean
23+
24+
searchQuery = '' #This is for your hasthag(s), separate by comma
25+
maxTweets = 80000 # Large max nr
26+
tweetsPerQry = 100 # the max the API permits
27+
fName = 'myfile.csv' #The CSV file where your tweets will be stored
28+
csvfile = open(fName, 'w');
29+
csvwriter = csv.writer(csvfile)
30+
31+
count=0
32+
33+
# If results from a specific ID onwards are reqd, set since_id to that ID.
34+
# else default to no lower limit, go as far back as API allows
35+
sinceId = None
36+
37+
# If results only below a specific ID are, set max_id to that ID.
38+
# else default to no upper limit, start from the most recent tweet matching the search query.
39+
max_id = -1
40+
tweetCount = 0
41+
42+
#print("Downloading max {0} tweets".format(maxTweets))
43+
with open(fName, 'w') as csvfile:
44+
while tweetCount < maxTweets:
45+
try:
46+
if (max_id <= 0):
47+
if (not sinceId):
48+
new_tweets = api.search(q=searchQuery, count=tweetsPerQry)
49+
else:
50+
new_tweets = api.search(q=searchQuery, count=tweetsPerQry,
51+
since_id=sinceId)
52+
else:
53+
if (not sinceId):
54+
new_tweets = api.search(q=searchQuery, count=tweetsPerQry,
55+
max_id=str(max_id - 1))
56+
else:
57+
new_tweets = api.search(q=searchQuery, count=tweetsPerQry,
58+
max_id=str(max_id - 1),
59+
since_id=sinceId)
60+
61+
if not new_tweets:
62+
print("No more tweets found")
63+
break
64+
for tweet in new_tweets:
65+
csvwriter.writerow([tweet.created_at, clean(tweet.user.screen_name), clean(tweet.text), tweet.user.created_at, tweet.user.followers_count, tweet.user.friends_count, tweet.user.statuses_count, clean(tweet.user.location), tweet.user.geo_enabled, tweet.user.lang, clean(tweet.user.time_zone), tweet.retweet_count]);
66+
67+
tweetCount += len(new_tweets)
68+
#print("Downloaded {0} tweets".format(tweetCount))
69+
max_id = new_tweets[-1].id
70+
except Exception as e:
71+
# Just exit if any error
72+
print("some error : " + str(e))
73+
pass
74+
75+
print ("Downloaded {0} tweets, Saved to {1}".format(tweetCount, fName))

0 commit comments

Comments
 (0)