Skip to content

Commit

Permalink
intial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
puppala-sumana committed Dec 6, 2022
1 parent e766f3c commit 2ff88f5
Show file tree
Hide file tree
Showing 393 changed files with 7,547,965 additions and 0 deletions.
27 changes: 27 additions & 0 deletions db_schema.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
-- drop table dbo.wordleTweets;
CREATE TABLE IF NOT EXISTS dbo.wordleTweets (
currentDate datetime,
wordleId INT NOT NULL ,
created_at bigint,
date varchar(100),
timezone int,
username varchar(100),
tweet varchar(500),
tweetClean varchar(100)
);

CREATE TABLE IF NOT EXISTS dbo.wordleUserDistances (
date varchar(100),
username varchar(100),
username_cmp varchar(100),
-- tweet varchar(500),
tweetClean varchar(100),
tweetClean_cmp varchar(100),
-- tweet_cmp varchar(500),

distance double
);

-- drop table dbo.wordleUserDistances;
commit;

23 changes: 23 additions & 0 deletions kafka-streamGen/.travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
dist: bionic
language: python
python:
- "3.6"
- "3.7"
- "3.8"
- "nightly"
matrix:
allow_failures:
- python: "nightly"
- python: "3.8"
install:
- pip install -r requirements.txt
script:
- python test.py
deploy:
provider: pypi
user: "codyzacharias"
password:
secure: sWWvx50F7KJBtf8z2njc+Q31WIAHiQs4zKEiGD4/7xrshw55H5z+WnqZ9VIP83qm9yKefoRKp7WnaJeXZ3ulZSLn64ue45lqFozWMyGvelRPOKvZi9XPMqBA7+qllR/GseTHSGC3G5EGxac6UEI3irYe3mZXxfjpxNOXVti8rJ2xX8TiJM0AVKRrdDiAstOhMMkXkB7fYXMQALwEp8UoW/UbjbeqsKueXydjStaESNP/QzRFZ3/tuNu+3HMz/olniLUhUWcF/xDbJVpXuaRMUalgqe+BTbDdtUVt/s/GKtpg5GAzJyhQphiCM/huihedUIKSoI+6A8PTzuxrLhB5BMi9pcllED02v7w1enpu5L2l5cRDgQJSOpkxkA5Eese8nxKOOq0KzwDQa3JByrRor8R4yz+p5s4u2r0Rs2A9fkjQYwd/uWBSEIRF4K9WZoniiikahwXq070DMRgV7HbovKSjo5NK5F8j+psrtqPF+OHN2aVfWxbGnezrOOkmzuTHhWZVj3pPSpQU1WFWHo9fPo4I6YstR4q6XjNNjrpY3ojSlv0ThMbUem7zhHTRkRsSA2SpPfqw5E3Jf7vaiQb4M5zkBVqxuq4tXb14GJ26tGD8tel8u8b+ccpkAE9xf+QavP8UHz4PbBhqgFX5TbV/H++cdsICyoZnT35yiaDOELM=
on:
tags: true
python: "3.7"
Binary file not shown.
Binary file added kafka-streamGen/__pycache__/setup.cpython-39.pyc
Binary file not shown.
45 changes: 45 additions & 0 deletions kafka-streamGen/consumer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import json
from kafka import KafkaConsumer

if __name__ == '__main__':
# Kafka Consumer
consumer = KafkaConsumer(
'messages',
bootstrap_servers='localhost:9092',
auto_offset_reset='earliest'
)
for message in consumer:
print(json.loads(message.value))
# print(csv.loads(message.value))




# from kafka import KafkaConsumer
# import json
# import os
# # from pymongo import MongoClient

# try:
# consumer = KafkaConsumer(
# "tweet",
# bootstrap_servers='localhost:9092',
# auto_offset_reset='earliest',
# group_id="twitter_consumer"
# )

# if __name__ == "__main__":
# print('Starting the Consumer...')
# print('Data-Scraping will take some time...')
# for msg in consumer:
# print(msg)
# new_tweet = {"$set":json.loads(msg.value)}
# # Collection.update_one(json.loads(msg.value), new_tweet, upsert=True)
# #Collection.insert_one(json.loads(msg.value))
# print("Tweet = {}".format(json.loads(msg.value)))
# consumer.close()

# except:
# consumer.close()
# print("\r", end="")
# exit(0)
68 changes: 68 additions & 0 deletions kafka-streamGen/data_generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import random
import string
import sys
from setup import twint
import nest_asyncio
import pandas as pd
import numpy
import time
from datetime import datetime, timedelta
nest_asyncio.apply()

limit= 5

def scrape():
tweets={
"tweet_id":[],
"created_at":[],
"date":[],
"timezone":[],
"username":[],
"tweet":[],
}
now=datetime.now()
since=(now-timedelta(seconds=60)).strftime('%Y-%m-%d %H:%M:%S')
until=(now+timedelta(seconds=12)).strftime('%Y-%m-%d %H:%M:%S')
print(now,since,until)

c=twint.Config()
c.Search="wordle"
c.Pandas=True
c.Limit=100
c.Filter_retweets=True
twint.run.Search(c)

df=twint.storage.panda.Tweets_df


for i in range(len(df)):
dates=df["date"][len(df)-i-1]
if dates<since or dates>until:
continue
tweets["created_at"].append(df["created_at"][len(df)-i-1])
tweets["date"].append(df["date"][len(df)-i-1])
tweets["timezone"].append(df["timezone"][len(df)-i-1])
tweets["username"].append(df["username"][len(df)-i-1])
tweets["tweet"].append(df["tweet"][len(df)-i-1])

# print(tweets)
result = []
for index in range(len(tweets["date"])):
#tweet_id=tweets["tweet_id"][index]
created_at=tweets["created_at"][index]
date=tweets["date"][index]
timezone=tweets["timezone"][index]
username=tweets["username"][index]
tweet=tweets["tweet"][index]

result.append({
"created_at":created_at,
"date":date,
"timezone": timezone,
"username":username,
"tweet":tweet,
})

return result

# scrape()
29 changes: 29 additions & 0 deletions kafka-streamGen/producer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import time
import json
import random
from datetime import datetime
from data_generator import scrape
from kafka import KafkaProducer

# Messages will be serialized as JSON
def json_serializer(message):
return json.dumps(message).encode('utf-8')

# Kafka Producer
producer = KafkaProducer(
bootstrap_servers=['localhost:9092'],
value_serializer=json_serializer
)


if __name__ == "__main__":
try:
while True:
tweet=scrape()
for i in range(len(tweet)):
producer.send('messages', {"tweet": tweet[i]})
time.sleep(60)
except Exception as e:
producer.close()
print("\r", end="")
exit(0)
3 changes: 3 additions & 0 deletions kafka-streamGen/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import sys
sys.path.insert(0, '/home/sumana/twint/')
import twint
1 change: 1 addition & 0 deletions stream-tweets/.bsp/sbt.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"name":"sbt","version":"1.8.0","bspVersion":"2.1.0-M1","languages":["scala"],"argv":["/usr/lib/jvm/java-11-openjdk-amd64/bin/java","-Xms100m","-Xmx100m","-classpath","/home/sumana/.local/share/JetBrains/IdeaIC2022.3/Scala/launcher/sbt-launch.jar","-Dsbt.script=/home/sumana/.local/share/coursier/bin/sbt","xsbt.boot.Boot","-bsp"]}
3 changes: 3 additions & 0 deletions stream-tweets/.idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions stream-tweets/.idea/.name

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions stream-tweets/.idea/codeStyles/Project.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions stream-tweets/.idea/codeStyles/codeStyleConfig.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 10 additions & 0 deletions stream-tweets/.idea/libraries/mysql_connector_java.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 2ff88f5

Please sign in to comment.