1
+ # -*- coding: utf-8 -*-
2
+ """Twitter API.ipynb
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1mpPiTJ6ed6vWuo37u7s3OI1eZSWe2m3V
8
+ """
9
+
10
+ #import necessary libraries
11
+
12
+ from google .colab import drive # to mount Google Drive to Colab notebook
13
+ import tweepy # Python wrapper around Twitter API
14
+ import pandas as pd
15
+ import csv
16
+ from datetime import date
17
+ from datetime import datetime
18
+ import time
19
+
20
+ # Mounting Google Drive
21
+
22
+ drive .mount ('/content/gdrive' )
23
+ path = './gdrive/My Drive/'
24
+
25
+ # Load Twitter API secrets from an external file
26
+ secrets = pd .read_csv ('/content/gdrive/MyDrive/secrets.csv' )
27
+
28
+ consumer_key = secrets ['consumer_key' ][0 ]
29
+ consumer_secret = secrets ['consumer_secret' ][0 ]
30
+ access_token = secrets ['access_token' ][0 ]
31
+ access_token_secret = secrets ['access_token_secret' ][0 ]
32
+
33
+ auth = tweepy .OAuthHandler (consumer_key , consumer_secret )
34
+ auth .set_access_token (access_token , access_token_secret )
35
+ auth .secure = True
36
+ api = tweepy .API (auth , wait_on_rate_limit = True , wait_on_rate_limit_notify = True )
37
+
38
+ # Helper function to handle twitter API rate limit
39
+
40
+ def limit_handled (cursor , list_name ):
41
+ while True :
42
+ try :
43
+ yield cursor .next ()
44
+ except tweepy .RateLimitError :
45
+ print ("\n Current number of data points in list = " + str (len (list_name )))
46
+ print ('Hit Twitter API rate limit.' )
47
+ for i in range (3 , 0 , - 1 ):
48
+ print ("Wait for {} mins." .format (i * 5 ))
49
+ time .sleep (5 * 60 )
50
+ except tweepy .error .TweepError :
51
+ print ('\n Caught TweepError exception' )
52
+
53
+ # Helper function to get all tweets for a specified user
54
+
55
+ def user_tweets (screen_name , number_of_tweets ):
56
+
57
+ # A list to hold all tweets by tweepy
58
+ alltweets = []
59
+
60
+ # To extract initial 200 tweets(most recent)
61
+ new_tweets = api .user_timeline (screen_name = screen_name ,count = 200 )
62
+
63
+ # Add these to the list
64
+ alltweets .extend (new_tweets )
65
+
66
+ # save the id of the oldest tweet less one
67
+ oldest = alltweets [- 1 ].id - 1
68
+
69
+ # keep grabbing tweets until we reach the desired limit
70
+ while (len (alltweets )< number_of_tweets ):
71
+ print ("getting tweets before %s" % (oldest ))
72
+
73
+ # all subsiquent requests use the max_id parameter to prevent duplicates
74
+ new_tweets = api .user_timeline (screen_name = screen_name ,count = 200 ,max_id = oldest )
75
+
76
+ # Add these to the list
77
+ alltweets .extend (new_tweets )
78
+
79
+ # update the id of the oldest tweet less one
80
+ oldest = alltweets [- 1 ].id - 1
81
+
82
+ print ("...%s tweets downloaded so far" % (len (alltweets )))
83
+ # store them as a 2D array which would later be used to write the csv file
84
+ outtweets = [[tweet .id_str , tweet .created_at , tweet .text , tweet .favorite_count ,
85
+ tweet .in_reply_to_screen_name , tweet .retweeted ] for tweet in alltweets ]
86
+
87
+ # write the csv
88
+ with open (path + '%s_tweets.csv' % screen_name , 'w' ) as f :
89
+ writer = csv .writer (f )
90
+ writer .writerow (["id" ,"created_at" ,"text" ,"likes" ,"in reply to" ,"retweeted" ])
91
+ writer .writerows (outtweets )
92
+
93
+ pass
94
+
95
+ # Helper function to get all tweets containing a specific keyword
96
+
97
+ def keyword_tweets (search_query , number_of_tweets ):
98
+
99
+ alltweets = []
100
+
101
+ new_tweets = api .search (q = search_query ,count = 200 )
102
+
103
+ alltweets .extend (new_tweets )
104
+
105
+ oldest = alltweets [- 1 ].id - 1
106
+
107
+ while (len (alltweets )< number_of_tweets ):
108
+ print ("getting tweets before %s" % (oldest ))
109
+
110
+ new_tweets = api .search (q = search_query ,count = 200 ,max_id = oldest )
111
+
112
+ alltweets .extend (new_tweets )
113
+
114
+ oldest = alltweets [- 1 ].id - 1
115
+
116
+ print ("...%s tweets downloaded so far" % (len (alltweets )))
117
+
118
+ outtweets = [[tweet .id_str , tweet .created_at , tweet .text , tweet .favorite_count ,
119
+ tweet .in_reply_to_screen_name , tweet .retweeted ] for tweet in alltweets ]
120
+
121
+ # write the csv
122
+ with open (path + '%s_tweets.csv' % search_query , 'w' ) as f :
123
+ writer = csv .writer (f )
124
+ writer .writerow (["id" ,"created_at" ,"text" ,"likes" ,"in reply to" ,"retweeted" ])
125
+ writer .writerows (outtweets )
126
+
127
+ pass
128
+
129
+ # Main driver code
130
+
131
+ if __name__ == '__main__' :
132
+ choice = int (input ('''Do you wish to search by:
133
+ 1. Twitter id
134
+ 2. Keyword\n ''' ))
135
+ if (choice == 1 ):
136
+ user_id = input ("Please provide the twitter id: " )
137
+ num = int (input ("Please provide the number of tweets you wish to extract (<3240): " ))
138
+ user_tweets (user_id , num )
139
+ tweets = pd .read_csv (path + '%s_tweets.csv' % user_id )
140
+ else :
141
+ keyword = input ("Please provide the ingredient you wish to search by: " )
142
+ num = int (input ("Please provide the number of tweets you wish to extract (<3240): " ))
143
+ keyword_tweets (keyword , num )
144
+ tweets = pd .read_csv (path + '%s_tweets.csv' % keyword )
145
+
146
+ tweets
0 commit comments