diff --git a/model/twitter.proto b/model/twitter.proto index 26cd21b..aa5eb32 100644 --- a/model/twitter.proto +++ b/model/twitter.proto @@ -5,10 +5,45 @@ package twitter; import "google/protobuf/timestamp.proto"; - message Tweet { string tweet_id = 1; string user_id = 2; string text = 3; google.protobuf.Timestamp tweeted_date = 4; } + +message User { + enum Gender { + FEMALE = 0; + MALE = 1; + } + + string id = 1; + string first_name = 2; + string last_name = 3; + string email = 4; + Gender gender = 5; + google.protobuf.Timestamp created_date = 6; +} + +message TweetLike { + string tweet_id = 1; + string user_id = 2; + google.protobuf.Timestamp created_date = 3; +} + +message Comment { + string id = 1; + string tweet_id = 2; + string user_id = 3; + string text = 4; + google.protobuf.Timestamp commented_date = 5; +} + +message UserFollow { + // User who is followed + string followed_id = 1; + // User who is following + string follower_id = 2; + google.protobuf.Timestamp followed_date = 3; +} diff --git a/model/twitter_pb2.py b/model/twitter_pb2.py index b9e34ca..ab1653b 100644 --- a/model/twitter_pb2.py +++ b/model/twitter_pb2.py @@ -14,7 +14,7 @@ from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x13model/twitter.proto\x12\x07twitter\x1a\x1fgoogle/protobuf/timestamp.proto\"j\n\x05Tweet\x12\x10\n\x08tweet_id\x18\x01 \x01(\t\x12\x0f\n\x07user_id\x18\x02 \x01(\t\x12\x0c\n\x04text\x18\x03 \x01(\t\x12\x30\n\x0ctweeted_date\x18\x04 \x01(\x0b\x32\x1a.google.protobuf.Timestampb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x13model/twitter.proto\x12\x07twitter\x1a\x1fgoogle/protobuf/timestamp.proto\"j\n\x05Tweet\x12\x10\n\x08tweet_id\x18\x01 \x01(\t\x12\x0f\n\x07user_id\x18\x02 \x01(\t\x12\x0c\n\x04text\x18\x03 \x01(\t\x12\x30\n\x0ctweeted_date\x18\x04 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\"\xc0\x01\n\x04User\x12\n\n\x02id\x18\x01 \x01(\t\x12\x12\n\nfirst_name\x18\x02 \x01(\t\x12\x11\n\tlast_name\x18\x03 \x01(\t\x12\r\n\x05\x65mail\x18\x04 \x01(\t\x12$\n\x06gender\x18\x05 \x01(\x0e\x32\x14.twitter.User.Gender\x12\x30\n\x0c\x63reated_date\x18\x06 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\"\x1e\n\x06Gender\x12\n\n\x06\x46\x45MALE\x10\x00\x12\x08\n\x04MALE\x10\x01\"`\n\tTweetLike\x12\x10\n\x08tweet_id\x18\x01 \x01(\t\x12\x0f\n\x07user_id\x18\x02 \x01(\t\x12\x30\n\x0c\x63reated_date\x18\x03 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\"z\n\x07\x43omment\x12\n\n\x02id\x18\x01 \x01(\t\x12\x10\n\x08tweet_id\x18\x02 \x01(\t\x12\x0f\n\x07user_id\x18\x03 \x01(\t\x12\x0c\n\x04text\x18\x04 \x01(\t\x12\x32\n\x0e\x63ommented_date\x18\x05 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\"i\n\nUserFollow\x12\x13\n\x0b\x66ollowed_id\x18\x01 \x01(\t\x12\x13\n\x0b\x66ollower_id\x18\x02 \x01(\t\x12\x31\n\rfollowed_date\x18\x03 \x01(\x0b\x32\x1a.google.protobuf.Timestampb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -24,4 +24,14 @@ DESCRIPTOR._options = None _globals['_TWEET']._serialized_start=65 _globals['_TWEET']._serialized_end=171 + _globals['_USER']._serialized_start=174 + _globals['_USER']._serialized_end=366 + _globals['_USER_GENDER']._serialized_start=336 + _globals['_USER_GENDER']._serialized_end=366 + _globals['_TWEETLIKE']._serialized_start=368 + _globals['_TWEETLIKE']._serialized_end=464 + _globals['_COMMENT']._serialized_start=466 + _globals['_COMMENT']._serialized_end=588 + _globals['_USERFOLLOW']._serialized_start=590 + _globals['_USERFOLLOW']._serialized_end=695 # @@protoc_insertion_point(module_scope) diff --git a/producer/exceptions.py b/producer/exceptions.py new file mode 100644 index 0000000..cfb1354 --- /dev/null +++ b/producer/exceptions.py @@ -0,0 +1,14 @@ +class UserNotFoundError(ValueError): + """Raise when no user is found""" + + def __init__(self, message: str): + self.message = message + super().__init__(message) + + +class TweetNotFoundError(ValueError): + """Raise when no tweet is found""" + + def __init__(self, message: str): + self.message = message + super().__init__(message) diff --git a/producer/logger.py b/producer/logger.py new file mode 100644 index 0000000..6184908 --- /dev/null +++ b/producer/logger.py @@ -0,0 +1,3 @@ +import logging + +logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.INFO) diff --git a/producer/model_faker.py b/producer/model_faker.py new file mode 100644 index 0000000..b6881e7 --- /dev/null +++ b/producer/model_faker.py @@ -0,0 +1,155 @@ +"""This package will generate random data (with model type) using Faker.""" + +from faker import Faker +from typing import List +import random +import datetime + +from model import twitter_pb2 +from logger import logging +from exceptions import UserNotFoundError, TweetNotFoundError + + +class FakeDataModel: + """Generate fake model to further produce them in Kafka topics.""" + ID_MAX_INT = 2147483647 + + def __init__(self) -> None: + self._faker = Faker() + + # List of all of the generated tweet ids so far + self._generated_tweet_ids: List[str] = [] + # List of all of the generated user ids so far + self._generated_user_ids: List[str] = [] + # List of all of the generated comment ids so far + self._generated_comment_ids: List[str] = [] + + def generate_tweet_model(self) -> twitter_pb2.Tweet: + """Return a new generated fake Tweet model""" + if len(self._generated_user_ids) == 0: + logging.error( + "No users are created. First you need to create a User " + "before creating a new Tweet.") + raise UserNotFoundError("There aren't any users created") + + tweet = twitter_pb2.Tweet( + tweet_id=self._generate_new_tweet_id(), + user_id=random.choice(self._generated_user_ids), + text=self._faker.text(), + ) + tweet.tweeted_date.FromDatetime(datetime.datetime.now()) + + return tweet + + def generate_user_model(self) -> twitter_pb2.User: + """Return a new generated fake User model""" + user = twitter_pb2.User( + id=self._generate_new_user_id(), + first_name=self._faker.first_name(), + last_name=self._faker.last_name(), + email=self._faker.email(), + gender=random.choice( + [twitter_pb2.User.Gender.FEMALE, twitter_pb2.User.Gender.MALE]), + ) + user.created_date.FromDatetime(datetime.datetime.now()) + + return user + + def generate_tweetlike_model(self) -> twitter_pb2.TweetLike: + """Return a new generated fake TweetLike model. + This class, models a Tweet liked by a User.""" + if len(self._generated_user_ids) == 0: + logging.error( + "No users are created. First you need to create a User " + "before creating a new TweetLike.") + raise UserNotFoundError("There aren't any users created") + + if len(self._generated_tweet_ids) == 0: + logging.error( + "No tweets are created. First you need to create a Tweet " + "before creating a new TweetLike.") + raise TweetNotFoundError("There aren't any tweets created") + + tweetlike = twitter_pb2.TweetLike( + tweet_id=random.choice(self._generated_tweet_ids), + user_id=random.choice(self._generated_user_ids), + ) + tweetlike.liked_date.FromDatetime(datetime.datetime.now()) + + return tweetlike + + def generate_comment_model(self) -> twitter_pb2.Comment: + """Return a new generated fake Comment model. + This class, models a Comment made by a User on a Tweet.""" + if len(self._generated_user_ids) == 0: + logging.error( + "No users are created. First you need to create a User " + "before creating a new Comment.") + raise UserNotFoundError("There aren't any users created") + + if len(self._generated_tweet_ids) == 0: + logging.error( + "No tweets are created. First you need to create a Tweet " + "before creating a new Comment.") + raise TweetNotFoundError("There aren't any tweets created") + + comment = twitter_pb2.Comment( + id=self._generate_new_tweet_id(), + tweet_id=random.choice(self._generated_tweet_ids), + user_id=random.choice(self._generated_user_ids), + text=self._faker.sentence() + ) + comment.commented_date.FromDatetime(datetime.datetime.now()) + + return comment + + def generate_userfollow_model(self) -> twitter_pb2.UserFollow: + """Return a new generated fake UserFollow model. + This class, models a User following another User.""" + if len(self._generated_user_ids) > 2: + logging.error( + "You need more than 2 users to model a follow. " + "First call creating User model 2 times.") + raise UserNotFoundError( + "You need more than 2 users to model a follow") + + # One user can not follow him/her self + followed_id = random.choice(self._generated_user_ids) + follower_id = random.choice(self._generated_user_ids) + while follower_id == followed_id: + follower_id = random.choice(self._generated_user_ids) + + userfollow = twitter_pb2.UserFollow( + followed_id=followed_id, + follower_id=follower_id, + ) + userfollow.followed_date.FromDatetime(datetime.datetime.now()) + + return userfollow + + def _generate_new_tweet_id(self) -> str: + """Generate a new Tweet id and add that to the list of generated + Tweet ids""" + new_id = str(self._faker.unique.random_int( + max=FakeDataModel.ID_MAX_INT)) + self._generated_tweet_ids.append(new_id) + + return new_id + + def _generate_new_user_id(self) -> str: + """Generate a new User id and add that to the list of generated + User ids""" + new_id = str(self._faker.unique.random_int( + max=FakeDataModel.ID_MAX_INT)) + self._generated_user_ids.append(new_id) + + return new_id + + def _generate_new_comment_id(self) -> str: + """Generate a new Comment id and add that to the list of generated + Comment ids""" + new_id = str(self._faker.unique.random_int( + max=FakeDataModel.ID_MAX_INT)) + self._generated_comment_ids.append(new_id) + + return new_id diff --git a/requirements.txt b/requirements.txt index 0c9dd0a..cfb6a49 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,9 +2,12 @@ autopep8==2.0.2 certifi==2022.12.7 charset-normalizer==3.1.0 confluent-kafka==2.1.0 +Faker==18.4.0 idna==3.4 protobuf==4.22.1 pycodestyle==2.10.0 +python-dateutil==2.8.2 requests==2.28.2 +six==1.16.0 tomli==2.0.1 urllib3==1.26.15