Skip to content

Commit 0e5a144

Browse files
Add files via upload
1 parent 1de03e0 commit 0e5a144

File tree

1 file changed

+146
-0
lines changed

1 file changed

+146
-0
lines changed

APIScripts/Twitter API/twitter_api.py

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
# -*- coding: utf-8 -*-
2+
"""Twitter API.ipynb
3+
4+
Automatically generated by Colaboratory.
5+
6+
Original file is located at
7+
https://colab.research.google.com/drive/1mpPiTJ6ed6vWuo37u7s3OI1eZSWe2m3V
8+
"""
9+
10+
#import necessary libraries
11+
12+
from google.colab import drive # to mount Google Drive to Colab notebook
13+
import tweepy # Python wrapper around Twitter API
14+
import pandas as pd
15+
import csv
16+
from datetime import date
17+
from datetime import datetime
18+
import time
19+
20+
# Mounting Google Drive
21+
22+
drive.mount('/content/gdrive')
23+
path = './gdrive/My Drive/'
24+
25+
# Load Twitter API secrets from an external file
26+
secrets = pd.read_csv('/content/gdrive/MyDrive/secrets.csv')
27+
28+
consumer_key = secrets['consumer_key'][0]
29+
consumer_secret = secrets['consumer_secret'][0]
30+
access_token = secrets['access_token'][0]
31+
access_token_secret = secrets['access_token_secret'][0]
32+
33+
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
34+
auth.set_access_token(access_token, access_token_secret)
35+
auth.secure = True
36+
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
37+
38+
# Helper function to handle twitter API rate limit
39+
40+
def limit_handled(cursor, list_name):
41+
while True:
42+
try:
43+
yield cursor.next()
44+
except tweepy.RateLimitError:
45+
print("\nCurrent number of data points in list = " + str(len(list_name)))
46+
print('Hit Twitter API rate limit.')
47+
for i in range(3, 0, -1):
48+
print("Wait for {} mins.".format(i * 5))
49+
time.sleep(5 * 60)
50+
except tweepy.error.TweepError:
51+
print('\nCaught TweepError exception' )
52+
53+
# Helper function to get all tweets for a specified user
54+
55+
def user_tweets(screen_name, number_of_tweets):
56+
57+
# A list to hold all tweets by tweepy
58+
alltweets = []
59+
60+
# To extract initial 200 tweets(most recent)
61+
new_tweets = api.user_timeline(screen_name = screen_name,count=200)
62+
63+
# Add these to the list
64+
alltweets.extend(new_tweets)
65+
66+
# save the id of the oldest tweet less one
67+
oldest = alltweets[-1].id - 1
68+
69+
# keep grabbing tweets until we reach the desired limit
70+
while(len(alltweets)<number_of_tweets):
71+
print("getting tweets before %s" % (oldest))
72+
73+
# all subsiquent requests use the max_id parameter to prevent duplicates
74+
new_tweets = api.user_timeline(screen_name = screen_name,count=200,max_id=oldest)
75+
76+
# Add these to the list
77+
alltweets.extend(new_tweets)
78+
79+
# update the id of the oldest tweet less one
80+
oldest = alltweets[-1].id - 1
81+
82+
print("...%s tweets downloaded so far" % (len(alltweets)))
83+
# store them as a 2D array which would later be used to write the csv file
84+
outtweets = [[tweet.id_str, tweet.created_at, tweet.text, tweet.favorite_count,
85+
tweet.in_reply_to_screen_name, tweet.retweeted] for tweet in alltweets]
86+
87+
# write the csv
88+
with open(path + '%s_tweets.csv' % screen_name, 'w') as f:
89+
writer = csv.writer(f)
90+
writer.writerow(["id","created_at","text","likes","in reply to","retweeted"])
91+
writer.writerows(outtweets)
92+
93+
pass
94+
95+
# Helper function to get all tweets containing a specific keyword
96+
97+
def keyword_tweets(search_query, number_of_tweets):
98+
99+
alltweets = []
100+
101+
new_tweets = api.search(q=search_query,count=200)
102+
103+
alltweets.extend(new_tweets)
104+
105+
oldest = alltweets[-1].id - 1
106+
107+
while(len(alltweets)<number_of_tweets):
108+
print("getting tweets before %s" % (oldest))
109+
110+
new_tweets = api.search(q=search_query,count=200,max_id=oldest)
111+
112+
alltweets.extend(new_tweets)
113+
114+
oldest = alltweets[-1].id - 1
115+
116+
print("...%s tweets downloaded so far" % (len(alltweets)))
117+
118+
outtweets = [[tweet.id_str, tweet.created_at, tweet.text, tweet.favorite_count,
119+
tweet.in_reply_to_screen_name, tweet.retweeted] for tweet in alltweets]
120+
121+
# write the csv
122+
with open(path + '%s_tweets.csv' % search_query, 'w') as f:
123+
writer = csv.writer(f)
124+
writer.writerow(["id","created_at","text","likes","in reply to","retweeted"])
125+
writer.writerows(outtweets)
126+
127+
pass
128+
129+
# Main driver code
130+
131+
if __name__ == '__main__':
132+
choice = int(input('''Do you wish to search by:
133+
1. Twitter id
134+
2. Keyword\n'''))
135+
if(choice==1):
136+
user_id = input("Please provide the twitter id: ")
137+
num = int(input("Please provide the number of tweets you wish to extract (<3240): "))
138+
user_tweets(user_id, num)
139+
tweets = pd.read_csv(path + '%s_tweets.csv'%user_id)
140+
else:
141+
keyword = input("Please provide the ingredient you wish to search by: ")
142+
num = int(input("Please provide the number of tweets you wish to extract (<3240): "))
143+
keyword_tweets(keyword, num)
144+
tweets = pd.read_csv(path + '%s_tweets.csv'%keyword)
145+
146+
tweets

0 commit comments

Comments
 (0)