-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
-- drop table dbo.wordleTweets; | ||
CREATE TABLE IF NOT EXISTS dbo.wordleTweets ( | ||
currentDate datetime, | ||
wordleId INT NOT NULL , | ||
created_at bigint, | ||
date varchar(100), | ||
timezone int, | ||
username varchar(100), | ||
tweet varchar(500), | ||
tweetClean varchar(100) | ||
); | ||
|
||
CREATE TABLE IF NOT EXISTS dbo.wordleUserDistances ( | ||
date varchar(100), | ||
username varchar(100), | ||
username_cmp varchar(100), | ||
-- tweet varchar(500), | ||
tweetClean varchar(100), | ||
tweetClean_cmp varchar(100), | ||
-- tweet_cmp varchar(500), | ||
|
||
distance double | ||
); | ||
|
||
-- drop table dbo.wordleUserDistances; | ||
commit; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
dist: bionic | ||
language: python | ||
python: | ||
- "3.6" | ||
- "3.7" | ||
- "3.8" | ||
- "nightly" | ||
matrix: | ||
allow_failures: | ||
- python: "nightly" | ||
- python: "3.8" | ||
install: | ||
- pip install -r requirements.txt | ||
script: | ||
- python test.py | ||
deploy: | ||
provider: pypi | ||
user: "codyzacharias" | ||
password: | ||
secure: sWWvx50F7KJBtf8z2njc+Q31WIAHiQs4zKEiGD4/7xrshw55H5z+WnqZ9VIP83qm9yKefoRKp7WnaJeXZ3ulZSLn64ue45lqFozWMyGvelRPOKvZi9XPMqBA7+qllR/GseTHSGC3G5EGxac6UEI3irYe3mZXxfjpxNOXVti8rJ2xX8TiJM0AVKRrdDiAstOhMMkXkB7fYXMQALwEp8UoW/UbjbeqsKueXydjStaESNP/QzRFZ3/tuNu+3HMz/olniLUhUWcF/xDbJVpXuaRMUalgqe+BTbDdtUVt/s/GKtpg5GAzJyhQphiCM/huihedUIKSoI+6A8PTzuxrLhB5BMi9pcllED02v7w1enpu5L2l5cRDgQJSOpkxkA5Eese8nxKOOq0KzwDQa3JByrRor8R4yz+p5s4u2r0Rs2A9fkjQYwd/uWBSEIRF4K9WZoniiikahwXq070DMRgV7HbovKSjo5NK5F8j+psrtqPF+OHN2aVfWxbGnezrOOkmzuTHhWZVj3pPSpQU1WFWHo9fPo4I6YstR4q6XjNNjrpY3ojSlv0ThMbUem7zhHTRkRsSA2SpPfqw5E3Jf7vaiQb4M5zkBVqxuq4tXb14GJ26tGD8tel8u8b+ccpkAE9xf+QavP8UHz4PbBhqgFX5TbV/H++cdsICyoZnT35yiaDOELM= | ||
on: | ||
tags: true | ||
python: "3.7" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
import json | ||
from kafka import KafkaConsumer | ||
|
||
if __name__ == '__main__': | ||
# Kafka Consumer | ||
consumer = KafkaConsumer( | ||
'messages', | ||
bootstrap_servers='localhost:9092', | ||
auto_offset_reset='earliest' | ||
) | ||
for message in consumer: | ||
print(json.loads(message.value)) | ||
# print(csv.loads(message.value)) | ||
|
||
|
||
|
||
|
||
# from kafka import KafkaConsumer | ||
# import json | ||
# import os | ||
# # from pymongo import MongoClient | ||
|
||
# try: | ||
# consumer = KafkaConsumer( | ||
# "tweet", | ||
# bootstrap_servers='localhost:9092', | ||
# auto_offset_reset='earliest', | ||
# group_id="twitter_consumer" | ||
# ) | ||
|
||
# if __name__ == "__main__": | ||
# print('Starting the Consumer...') | ||
# print('Data-Scraping will take some time...') | ||
# for msg in consumer: | ||
# print(msg) | ||
# new_tweet = {"$set":json.loads(msg.value)} | ||
# # Collection.update_one(json.loads(msg.value), new_tweet, upsert=True) | ||
# #Collection.insert_one(json.loads(msg.value)) | ||
# print("Tweet = {}".format(json.loads(msg.value))) | ||
# consumer.close() | ||
|
||
# except: | ||
# consumer.close() | ||
# print("\r", end="") | ||
# exit(0) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
import random | ||
import string | ||
import sys | ||
from setup import twint | ||
import nest_asyncio | ||
import pandas as pd | ||
import numpy | ||
import time | ||
from datetime import datetime, timedelta | ||
nest_asyncio.apply() | ||
|
||
limit= 5 | ||
|
||
def scrape(): | ||
tweets={ | ||
"tweet_id":[], | ||
"created_at":[], | ||
"date":[], | ||
"timezone":[], | ||
"username":[], | ||
"tweet":[], | ||
} | ||
now=datetime.now() | ||
since=(now-timedelta(seconds=60)).strftime('%Y-%m-%d %H:%M:%S') | ||
until=(now+timedelta(seconds=12)).strftime('%Y-%m-%d %H:%M:%S') | ||
print(now,since,until) | ||
|
||
c=twint.Config() | ||
c.Search="wordle" | ||
c.Pandas=True | ||
c.Limit=100 | ||
c.Filter_retweets=True | ||
twint.run.Search(c) | ||
|
||
df=twint.storage.panda.Tweets_df | ||
|
||
|
||
for i in range(len(df)): | ||
dates=df["date"][len(df)-i-1] | ||
if dates<since or dates>until: | ||
continue | ||
tweets["created_at"].append(df["created_at"][len(df)-i-1]) | ||
tweets["date"].append(df["date"][len(df)-i-1]) | ||
tweets["timezone"].append(df["timezone"][len(df)-i-1]) | ||
tweets["username"].append(df["username"][len(df)-i-1]) | ||
tweets["tweet"].append(df["tweet"][len(df)-i-1]) | ||
|
||
# print(tweets) | ||
result = [] | ||
for index in range(len(tweets["date"])): | ||
#tweet_id=tweets["tweet_id"][index] | ||
created_at=tweets["created_at"][index] | ||
date=tweets["date"][index] | ||
timezone=tweets["timezone"][index] | ||
username=tweets["username"][index] | ||
tweet=tweets["tweet"][index] | ||
|
||
result.append({ | ||
"created_at":created_at, | ||
"date":date, | ||
"timezone": timezone, | ||
"username":username, | ||
"tweet":tweet, | ||
}) | ||
|
||
return result | ||
|
||
# scrape() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
import time | ||
import json | ||
import random | ||
from datetime import datetime | ||
from data_generator import scrape | ||
from kafka import KafkaProducer | ||
|
||
# Messages will be serialized as JSON | ||
def json_serializer(message): | ||
return json.dumps(message).encode('utf-8') | ||
|
||
# Kafka Producer | ||
producer = KafkaProducer( | ||
bootstrap_servers=['localhost:9092'], | ||
value_serializer=json_serializer | ||
) | ||
|
||
|
||
if __name__ == "__main__": | ||
try: | ||
while True: | ||
tweet=scrape() | ||
for i in range(len(tweet)): | ||
producer.send('messages', {"tweet": tweet[i]}) | ||
time.sleep(60) | ||
except Exception as e: | ||
producer.close() | ||
print("\r", end="") | ||
exit(0) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
import sys | ||
sys.path.insert(0, '/home/sumana/twint/') | ||
import twint |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"name":"sbt","version":"1.8.0","bspVersion":"2.1.0-M1","languages":["scala"],"argv":["/usr/lib/jvm/java-11-openjdk-amd64/bin/java","-Xms100m","-Xmx100m","-classpath","/home/sumana/.local/share/JetBrains/IdeaIC2022.3/Scala/launcher/sbt-launch.jar","-Dsbt.script=/home/sumana/.local/share/coursier/bin/sbt","xsbt.boot.Boot","-bsp"]} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.