Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
pragyakhanna11 authored Jun 10, 2021
1 parent 1de03e0 commit 0e5a144
Showing 1 changed file with 146 additions and 0 deletions.
146 changes: 146 additions & 0 deletions APIScripts/Twitter API/twitter_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
# -*- coding: utf-8 -*-
"""Twitter API.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1mpPiTJ6ed6vWuo37u7s3OI1eZSWe2m3V
"""

#import necessary libraries

from google.colab import drive # to mount Google Drive to Colab notebook
import tweepy # Python wrapper around Twitter API
import pandas as pd
import csv
from datetime import date
from datetime import datetime
import time

# Mounting Google Drive

drive.mount('/content/gdrive')
path = './gdrive/My Drive/'

# Load Twitter API secrets from an external file
secrets = pd.read_csv('/content/gdrive/MyDrive/secrets.csv')

consumer_key = secrets['consumer_key'][0]
consumer_secret = secrets['consumer_secret'][0]
access_token = secrets['access_token'][0]
access_token_secret = secrets['access_token_secret'][0]

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
auth.secure = True
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)

# Helper function to handle twitter API rate limit

def limit_handled(cursor, list_name):
while True:
try:
yield cursor.next()
except tweepy.RateLimitError:
print("\nCurrent number of data points in list = " + str(len(list_name)))
print('Hit Twitter API rate limit.')
for i in range(3, 0, -1):
print("Wait for {} mins.".format(i * 5))
time.sleep(5 * 60)
except tweepy.error.TweepError:
print('\nCaught TweepError exception' )

# Helper function to get all tweets for a specified user

def user_tweets(screen_name, number_of_tweets):

# A list to hold all tweets by tweepy
alltweets = []

# To extract initial 200 tweets(most recent)
new_tweets = api.user_timeline(screen_name = screen_name,count=200)

# Add these to the list
alltweets.extend(new_tweets)

# save the id of the oldest tweet less one
oldest = alltweets[-1].id - 1

# keep grabbing tweets until we reach the desired limit
while(len(alltweets)<number_of_tweets):
print("getting tweets before %s" % (oldest))

# all subsiquent requests use the max_id parameter to prevent duplicates
new_tweets = api.user_timeline(screen_name = screen_name,count=200,max_id=oldest)

# Add these to the list
alltweets.extend(new_tweets)

# update the id of the oldest tweet less one
oldest = alltweets[-1].id - 1

print("...%s tweets downloaded so far" % (len(alltweets)))
# store them as a 2D array which would later be used to write the csv file
outtweets = [[tweet.id_str, tweet.created_at, tweet.text, tweet.favorite_count,
tweet.in_reply_to_screen_name, tweet.retweeted] for tweet in alltweets]

# write the csv
with open(path + '%s_tweets.csv' % screen_name, 'w') as f:
writer = csv.writer(f)
writer.writerow(["id","created_at","text","likes","in reply to","retweeted"])
writer.writerows(outtweets)

pass

# Helper function to get all tweets containing a specific keyword

def keyword_tweets(search_query, number_of_tweets):

alltweets = []

new_tweets = api.search(q=search_query,count=200)

alltweets.extend(new_tweets)

oldest = alltweets[-1].id - 1

while(len(alltweets)<number_of_tweets):
print("getting tweets before %s" % (oldest))

new_tweets = api.search(q=search_query,count=200,max_id=oldest)

alltweets.extend(new_tweets)

oldest = alltweets[-1].id - 1

print("...%s tweets downloaded so far" % (len(alltweets)))

outtweets = [[tweet.id_str, tweet.created_at, tweet.text, tweet.favorite_count,
tweet.in_reply_to_screen_name, tweet.retweeted] for tweet in alltweets]

# write the csv
with open(path + '%s_tweets.csv' % search_query, 'w') as f:
writer = csv.writer(f)
writer.writerow(["id","created_at","text","likes","in reply to","retweeted"])
writer.writerows(outtweets)

pass

# Main driver code

if __name__ == '__main__':
choice = int(input('''Do you wish to search by:
1. Twitter id
2. Keyword\n'''))
if(choice==1):
user_id = input("Please provide the twitter id: ")
num = int(input("Please provide the number of tweets you wish to extract (<3240): "))
user_tweets(user_id, num)
tweets = pd.read_csv(path + '%s_tweets.csv'%user_id)
else:
keyword = input("Please provide the ingredient you wish to search by: ")
num = int(input("Please provide the number of tweets you wish to extract (<3240): "))
keyword_tweets(keyword, num)
tweets = pd.read_csv(path + '%s_tweets.csv'%keyword)

tweets

0 comments on commit 0e5a144

Please sign in to comment.