Skip to content
This repository has been archived by the owner on May 20, 2022. It is now read-only.

Commit

Permalink
Refactor the module
Browse files Browse the repository at this point in the history
This refactor allow us to:
- introduce entry points for a better CLI integration
- introduce unit tests
- introduce tox environment
    - run unit tests
    - run pep8 checks and linters
    - start a virtualenv to interact with tournesolbot by using
      `tox -e venv -- tournesolbot`. Useful for development session.
  • Loading branch information
4383 committed Sep 8, 2021
1 parent 3823c5d commit 2f5a06d
Show file tree
Hide file tree
Showing 11 changed files with 345 additions and 12 deletions.
12 changes: 5 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,8 @@ pip install -e .

## Usage

```
cd tournesolbot
python tournesolbot.py [-h] [-l 'en'/'fr'] [-a] [-d] [-m] [-r] [-t 'My tweet']
```sh
$ tournesolbot [-h] [-l 'en'/'fr'] [-a] [-d] [-m] [-r] [-t 'My tweet']

This is this help of the Tournesol-Twitter-Bot.

Expand Down Expand Up @@ -54,17 +52,17 @@ If you don't own the TournesolBot twitter account, you can use the following com
Write the daily tweet with the french twitter account:
```sh
python tournesolbot.py -l fr -a -d
tournesolbot -l fr -a -d
```
Respond to mention with the english twitter account:
```sh
python tournesolbot.py -l en -a -r
tournesolbot -l en -a -r
```
Creating a list of missing YouTube channel not associated yet with a twitter account:
```sh
python tournesolbot.py -l en -m
tournesolbot -l en -m
```
5 changes: 5 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,4 +50,9 @@
"Topic :: Scientific/Research",
"Topic :: Scientific/Data",
],
entry_points={
"console_scripts": [
"tournesolbot = tournesolbot.__main__:main"
]
}
)
3 changes: 3 additions & 0 deletions test-requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
fixtures
tox
nose
Empty file added tournesolbot/__init__.py
Empty file.
281 changes: 281 additions & 0 deletions tournesolbot/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,281 @@
import sys
import requests
import pandas as pd
import getopt

from tournesolbot.tournesol_api_functions import get_good_video, get_video_info, get_missing_channel_list
from tournesolbot.twitter_api_functions import twitter_authentication, write_tweet, write_response_tweet
from tournesolbot.data.utils_dict import ACCEPTED_LANGUAGE,YT_2_TWITTER, CRITERIA_DICT
from tournesolbot.data.utils_dict import already_answered_filepath, daily_tweet_text, video_details_tweet_text, not_found_video_tweet_text


# Parameters
FROM_TOP = 70
LAST_N_DAYS = 120

def get_top_percentage(criteria_row):
# Get the top percentage categorie (Top 1,2,5,10,20,50%) from the quantile value (in the row)

if criteria_row['quantile_val'] >= 0.5:
return "Not in Top 50%"

return "Top {val}%".format(val=round(criteria_row['quantile_val'] * 100))


def get_top_criteria(video_dict):
# Get the top creteria ranked by their quantile ranking
# TODO: This function could probably be simplified

quantile_list=[]

for key, values in CRITERIA_DICT.items():
quantile_list.append(values[2])

for key,value in video_dict.items():
if key in quantile_list:
quantile = video_dict[key]
criteria = key.split('_quantile')[0]
CRITERIA_DICT[criteria].append(quantile)

df = pd.DataFrame.from_dict(CRITERIA_DICT,orient='index',columns=['en','fr', 'quantile', 'quantile_val'])
df = df.sort_values(by=['quantile_val'])
df.reset_index(drop=True, inplace=True)

df['top'] = df.apply(lambda row: get_top_percentage(row), axis=1)

return df


def daily_tweet(api,language='en'):
# Prepare and tweet the daily video recommandation

# Get the video id for today's tweet
video_id = get_good_video(FROM_TOP,LAST_N_DAYS,language)

video_dict = get_video_info(video_id)

video_name = video_dict['name']
channel = video_dict['uploader']
n_contributors = video_dict['rating_n_experts']
n_ratings = video_dict['rating_n_ratings']

# Get the top criteria
df = get_top_criteria(video_dict)
crit1 = df.loc[0,[language]].item()
crit2 = df.loc[1,[language]].item()
crit3 = df.loc[2,[language]].item()

# Check if the channel is paired with a Twitter account, if not just use the name
if channel in YT_2_TWITTER:
twitter_accout = YT_2_TWITTER[channel]
else:
twitter_accout = "'" + channel + "'"

# Check lenght and shorten the video title if the tweet is too long
tweet_len_no_title = sum(len(i) for i in daily_tweet_text[language]) + \
sum(len(i) for i in [twitter_accout,str(n_ratings),
str(n_contributors),crit1,crit2,video_id])

# 272 because :
# emoji count 2 caracters
# + 2 characteres for the youtube link
# + a small security in case of emoji in the title
car_to_del = 272 - tweet_len_no_title - len(video_name)
if car_to_del < 0:
car_to_del -= 3
video_name = video_name[:car_to_del] + '...'

# Crete the tweet
tweet = daily_tweet_text[language][0] + video_name + \
daily_tweet_text[language][1] + twitter_accout + \
daily_tweet_text[language][2] + str(n_ratings) + \
daily_tweet_text[language][3] + str(n_contributors) + \
daily_tweet_text[language][4] + crit1 + \
daily_tweet_text[language][5] + crit2 + \
daily_tweet_text[language][6] + video_id

# Tweet it
write_tweet(api,tweet,language,video_id)


def get_video_id_from_tweet(tweet_text):
# Look for a youtube link in the tweet and return the video id

tweet_text_list = tweet_text.split(' ')

# Get the YouTube link from the tweet
if 'youtube.com' in tweet_text:
# To find normal YouTube link
link = [s for s in tweet_text_list if 'youtube.com' in s]

elif 'youtu.be' in tweet_text:
# To find shorten link in the form "youtu.be/lG4VkPoG3ko"
link = [s for s in tweet_text_list if 'youtu.be' in s]

elif 'http' in tweet_text:
# To find shorten link in the form "https://t.co/xoP1b1DwOs"
short_link = [s for s in tweet_text_list if 'http' in s]

try:
link = requests.head(short_link[0]).headers['location']
except:
print('The shorten link could not be used.')
link = ''
else:
link = ''
print('No link found in this tweet.')

# Get the video id from the link
if 'youtube.com' in link:
video_id = link.split('watch?v=')[1].split('&')[0]

elif 'youtu.be' in link:
video_id = link.split('/')[-1]

else:
video_id=''
print('No YouTube video ID has been found in this tweet!')

return video_id


def respond_to_mention(api,language='en'):
# Prepare and tweet a response when TournesolBot is mention in a tweet (if necessary)

print('Respond to mention in which TournesolBot was mentioned.')

# Get already answered tweet id list
with open(already_answered_filepath[language], "r") as file:
already_answered = [int(x.strip('\n')) for x in file.readlines()]

# Get mention in the timeline
mentions = api.mentions_timeline(tweet_mode='extended')

for mention in mentions:

tweet_id = mention.id
tweet_text = mention.full_text
tweet_user = mention .user.screen_name

print('\n-------------------------------------------------')
print('Tweet id:', tweet_id)
print('from:', tweet_user)
print('text:',tweet_text)

# Pass if this tweet has already been answered
if tweet_id in already_answered:
print('Already answered to this tweet!')
continue

video_id = get_video_id_from_tweet(tweet_text)

if not video_id:
print('Do not need to respond to this tweet.')

# Add if to the list of already answered tweet
with open(already_answered_filepath[language], 'a') as file:
file.write(f'{tweet_id}\n')
print('The tweet id has been added in the list of already answered tweet.')
continue

video_dict = get_video_info(video_id)

if video_dict:

# Get video main info
video_name = video_dict['name']
channel = video_dict['uploader']
n_contributors = video_dict['rating_n_experts']
n_ratings = video_dict['rating_n_ratings']

print('Video found in the tweet:')
print('Title:', video_name)
print('Channel:', channel)

# Get the 3 top criteria and their Top x%
df = get_top_criteria(video_dict)
crit1 = df.loc[0,[language]].item() + f" ({df.loc[0,['top']].item()})"
crit2 = df.loc[1,[language]].item() + f" ({df.loc[1,['top']].item()})"
crit3 = df.loc[2,[language]].item() + f" ({df.loc[2,['top']].item()})"

# Create the tweet
tweet = video_details_tweet_text[language][0] + tweet_user + \
video_details_tweet_text[language][1] + str(n_ratings) + \
video_details_tweet_text[language][2] + str(n_contributors) + \
video_details_tweet_text[language][3] + crit1 + \
video_details_tweet_text[language][4] + crit2 + \
video_details_tweet_text[language][5] + crit3

else:
# Create the tweet for not found video
tweet = not_found_video_tweet_text[language][0] + tweet_user + \
not_found_video_tweet_text[language][1]

# Tweet the response
write_response_tweet(api,tweet,language,tweet_id)


def print_help():
print("Usage: python tournesolbot.py [-h] [-l 'en'/'fr'] [-a] [-d] [-m] [-r] [-t 'My tweet'] ")

print("\nThis is this help of the Tournesol-Twitter-Bot.")

print("\nRequested arguments:")
print("\n-l\tselect the language that will be use to tweet and for the other functions.")

print("\nOptional arguments:")
print("\n-h\tshow this help message and exit")
print("-a\tauthentication to the Twitter account (access required!)")
print("-d\tmake the daily recommandation tweet")
print("-m\tget the missing twitter account to fill the 'YT_2_TWITTER' dictionnary.")
print("-r\trespond to tweets in which Tournesol-Bot has been mentioned.")
print("-t\ttweet the corresponding string (e.g. 'My tweet').\n")


def main():

# Empty api in the case no autentification is used
api = ''

# Get command line arguments
argv = sys.argv[1:]
try:
opts, args = getopt.getopt(argv,"l:admrt:h",["language","authentication","daily","missing","respond","tweet","help"])
except getopt.GetoptError:
print_help()
sys.exit(2)

if len(sys.argv) <= 1:
print_help()
sys.exit(2)

for opt, arg in opts:
# Help
if opt in ("-h", "--help"):
print_help()
sys.exit()

if opt == '-l':
language = arg.strip()
if language not in ACCEPTED_LANGUAGE:
raise ValueError(f"Language not recognize! only {list(ACCEPTED_LANGUAGE.keys())} are valid.")
print('The selected language is: ',ACCEPTED_LANGUAGE[language])

if opt == '-a':
api =twitter_authentication(language)

if opt == '-d':
daily_tweet(api,language)

if opt == '-t':
write_tweet(api,arg,language,'')

if opt == '-m':
get_missing_channel_list(FROM_TOP,LAST_N_DAYS,language)

if opt == '-r':
respond_to_mention(api,language)


if __name__ == '__main__':
main()
Empty file added tournesolbot/tests/__init__.py
Empty file.
12 changes: 12 additions & 0 deletions tournesolbot/tests/test_tournesolbot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# -*- encoding: utf-8 -*-
import unittest

from tournesolbot import get_top_percentage


class TestGetTopPercentage(unittest.TestCase):

def test_get_top_percentage(self):
criteria = {'quantile_val': 0.2}
result = get_top_percentage(criteria)
self.assertEqual("Top 20%", result)
2 changes: 1 addition & 1 deletion tournesolbot/tournesol_api_functions.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import requests
import pandas as pd

from data.utils_dict import YT_2_TWITTER, already_shared_filepath
from tournesolbot.data.utils_dict import YT_2_TWITTER, already_shared_filepath


def remove_already_tweeted_videos_and_channels(df, language='en'):
Expand Down
10 changes: 7 additions & 3 deletions tournesolbot/tournesolbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
import pandas as pd
import getopt

from tournesol_api_functions import get_good_video, get_video_info, get_missing_channel_list
from twitter_api_functions import twitter_authentication, write_tweet, write_response_tweet
from tournesolbot.tournesol_api_functions import get_good_video, get_video_info, get_missing_channel_list
from tournesolbot.twitter_api_functions import twitter_authentication, write_tweet, write_response_tweet
from data.utils_dict import ACCEPTED_LANGUAGE,YT_2_TWITTER, CRITERIA_DICT
from data.utils_dict import already_answered_filepath, daily_tweet_text, video_details_tweet_text, not_found_video_tweet_text

Expand Down Expand Up @@ -232,7 +232,7 @@ def print_help():
print("-t\ttweet the corresponding string (e.g. 'My tweet').\n")


if __name__ == '__main__':
def main():

# Empty api in the case no autentification is used
api = ''
Expand Down Expand Up @@ -275,3 +275,7 @@ def print_help():

if opt == '-r':
respond_to_mention(api,language)


if __name__ == '__main__':
main()
2 changes: 1 addition & 1 deletion tournesolbot/twitter_api_functions.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import sys
import tweepy

from data.utils_dict import already_shared_filepath, already_answered_filepath
from tournesolbot.data.utils_dict import already_shared_filepath, already_answered_filepath


def twitter_authentication(language='en'):
Expand Down
Loading

0 comments on commit 2f5a06d

Please sign in to comment.