|
| 1 | +#This code searches for tweets with a particuar keyword and writes certain fields into a CSV file |
| 2 | + |
| 3 | +import sys, csv |
| 4 | +import twitter |
| 5 | +import os |
| 6 | +import tweepy |
| 7 | + |
| 8 | +# Replace the API_KEY and API_SECRET with your application's key and secret. |
| 9 | +#This code is using AppAuthHandler, not OAuthHandler to get higher limits, 2.5 times. |
| 10 | +auth = tweepy.AppAuthHandler('j2UAZfXuk6iitAjnLjbFcmn0y', 'Q9X7g4eAhyElO8u5VI183QwRCUF1sXrZs8m9poGt6Q1pmN4cOw') |
| 11 | +api = tweepy.API(auth, wait_on_rate_limit=True, |
| 12 | + wait_on_rate_limit_notify=True) |
| 13 | + |
| 14 | + |
| 15 | +if (not api): |
| 16 | + print ("Can't Authenticate") |
| 17 | + sys.exit(-1) |
| 18 | +def clean(val): |
| 19 | + clean = "" |
| 20 | + if val: |
| 21 | + clean = val.encode('utf-8') |
| 22 | + return clean |
| 23 | + |
| 24 | +searchQuery = '' #This is for your hasthag(s), separate by comma |
| 25 | +maxTweets = 80000 # Large max nr |
| 26 | +tweetsPerQry = 100 # the max the API permits |
| 27 | +fName = 'myfile.csv' #The CSV file where your tweets will be stored |
| 28 | +csvfile = open(fName, 'w'); |
| 29 | +csvwriter = csv.writer(csvfile) |
| 30 | + |
| 31 | +count=0 |
| 32 | + |
| 33 | +# If results from a specific ID onwards are reqd, set since_id to that ID. |
| 34 | +# else default to no lower limit, go as far back as API allows |
| 35 | +sinceId = None |
| 36 | + |
| 37 | +# If results only below a specific ID are, set max_id to that ID. |
| 38 | +# else default to no upper limit, start from the most recent tweet matching the search query. |
| 39 | +max_id = -1 |
| 40 | +tweetCount = 0 |
| 41 | + |
| 42 | +#print("Downloading max {0} tweets".format(maxTweets)) |
| 43 | +with open(fName, 'w') as csvfile: |
| 44 | + while tweetCount < maxTweets: |
| 45 | + try: |
| 46 | + if (max_id <= 0): |
| 47 | + if (not sinceId): |
| 48 | + new_tweets = api.search(q=searchQuery, count=tweetsPerQry) |
| 49 | + else: |
| 50 | + new_tweets = api.search(q=searchQuery, count=tweetsPerQry, |
| 51 | + since_id=sinceId) |
| 52 | + else: |
| 53 | + if (not sinceId): |
| 54 | + new_tweets = api.search(q=searchQuery, count=tweetsPerQry, |
| 55 | + max_id=str(max_id - 1)) |
| 56 | + else: |
| 57 | + new_tweets = api.search(q=searchQuery, count=tweetsPerQry, |
| 58 | + max_id=str(max_id - 1), |
| 59 | + since_id=sinceId) |
| 60 | + |
| 61 | + if not new_tweets: |
| 62 | + print("No more tweets found") |
| 63 | + break |
| 64 | + for tweet in new_tweets: |
| 65 | + csvwriter.writerow([tweet.created_at, clean(tweet.user.screen_name), clean(tweet.text), tweet.user.created_at, tweet.user.followers_count, tweet.user.friends_count, tweet.user.statuses_count, clean(tweet.user.location), tweet.user.geo_enabled, tweet.user.lang, clean(tweet.user.time_zone), tweet.retweet_count]); |
| 66 | + |
| 67 | + tweetCount += len(new_tweets) |
| 68 | + #print("Downloaded {0} tweets".format(tweetCount)) |
| 69 | + max_id = new_tweets[-1].id |
| 70 | + except Exception as e: |
| 71 | + # Just exit if any error |
| 72 | + print("some error : " + str(e)) |
| 73 | + pass |
| 74 | + |
| 75 | +print ("Downloaded {0} tweets, Saved to {1}".format(tweetCount, fName)) |
0 commit comments