|
| 1 | +from app import config |
| 2 | +from functools import lru_cache |
| 3 | +from nltk.tokenize import word_tokenize |
| 4 | +from typing import Optional |
| 5 | +from word_forms.lemmatizer import lemmatize |
| 6 | + |
| 7 | +import re |
| 8 | + |
| 9 | + |
| 10 | +FLAIRS_EXTENSION = '.jpg' |
| 11 | +FLAIRS_REL_PATH = f'{config.STATIC_ABS_PATH}\\event_flairs' |
| 12 | +IMAGES_RELATED_WORDS_MAP = { |
| 13 | + 'birthday': 'birthday', |
| 14 | + 'coffee': 'coffee', |
| 15 | + 'coffees': 'coffee', |
| 16 | + 'concert': 'concert', |
| 17 | + 'gig': 'concert', |
| 18 | + 'concerts': 'concert', |
| 19 | + 'gigs': 'concert', |
| 20 | + 'bicycle': 'cycle', |
| 21 | + 'cycling': 'cycle', |
| 22 | + 'bike': 'cycle', |
| 23 | + 'bicycles': 'cycle', |
| 24 | + 'bikes': 'cycle', |
| 25 | + 'biking': 'cycle', |
| 26 | + 'dentist': 'dentist', |
| 27 | + 'dentistry': 'dentist', |
| 28 | + 'dental': 'dentist', |
| 29 | + 'dinner': 'food', |
| 30 | + 'dinners': 'food', |
| 31 | + 'restaurant': 'food', |
| 32 | + 'restaurants': 'food', |
| 33 | + 'family meal': 'food', |
| 34 | + 'lunch': 'food', |
| 35 | + 'lunches': 'food', |
| 36 | + 'luncheon': 'food', |
| 37 | + 'cocktail': 'drank', |
| 38 | + 'drinks': 'drank', |
| 39 | + 'cocktails': 'drank', |
| 40 | + 'golf': 'golf', |
| 41 | + 'graduation': 'graduate', |
| 42 | + 'gym': 'gym', |
| 43 | + 'workout': 'gym', |
| 44 | + 'workouts': 'gym', |
| 45 | + 'haircut': 'haircut', |
| 46 | + 'hair': 'haircut', |
| 47 | + 'halloween': 'halloween', |
| 48 | + 'helloween': 'halloween', |
| 49 | + "hallowe'en": 'halloween', |
| 50 | + 'allhalloween': 'halloween', |
| 51 | + "all hallows' eve": 'halloween', |
| 52 | + "all saints' Eve": 'halloween', |
| 53 | + 'hiking': 'hike', |
| 54 | + 'hike': 'hike', |
| 55 | + 'hikes': 'hike', |
| 56 | + 'kayaking': 'kayak', |
| 57 | + 'piano': 'music', |
| 58 | + 'singing': 'music', |
| 59 | + 'music class': 'music', |
| 60 | + 'choir practice': 'music', |
| 61 | + 'flute': 'music', |
| 62 | + 'orchestra': 'music', |
| 63 | + 'oboe': 'music', |
| 64 | + 'clarinet': 'music', |
| 65 | + 'saxophone': 'music', |
| 66 | + 'cornett': 'music', |
| 67 | + 'trumpet': 'music', |
| 68 | + 'contrabass': 'music', |
| 69 | + 'cello': 'music', |
| 70 | + 'trombone': 'music', |
| 71 | + 'tuba': 'music', |
| 72 | + 'music ensemble': 'music', |
| 73 | + 'string quartett': 'music', |
| 74 | + 'guitar lesson': 'music', |
| 75 | + 'classical music': 'music', |
| 76 | + 'choir': 'music', |
| 77 | + 'manicure': 'manicure', |
| 78 | + 'pedicure': 'manicure', |
| 79 | + 'manicures': 'manicure', |
| 80 | + 'pedicures': 'manicure', |
| 81 | + 'massage': 'massage', |
| 82 | + 'back rub': 'massage', |
| 83 | + 'backrub': 'massage', |
| 84 | + 'massages': 'massage', |
| 85 | + 'pills': 'pill', |
| 86 | + 'medicines': 'pill', |
| 87 | + 'medicine': 'pill', |
| 88 | + 'drug': 'pill', |
| 89 | + 'drugs': 'pill', |
| 90 | + 'ping pong': 'pingpong', |
| 91 | + 'table tennis': 'pingpong', |
| 92 | + 'ping-pong': 'pingpong', |
| 93 | + 'pingpong': 'pingpong', |
| 94 | + 'plan week': 'plan', |
| 95 | + 'plan quarter': 'plan', |
| 96 | + 'plan day': 'plan', |
| 97 | + 'plan vacation': 'plan', |
| 98 | + 'week planning': 'plan', |
| 99 | + 'vacation planning': 'plan', |
| 100 | + 'pokemon': 'pokemon', |
| 101 | + 'reading': 'read', |
| 102 | + 'newspaper': 'read', |
| 103 | + 'fridge repair': 'repair', |
| 104 | + 'handyman': 'repair', |
| 105 | + 'electrician': 'repair', |
| 106 | + 'diy': 'repair', |
| 107 | + 'jog': 'ran', |
| 108 | + 'jogging': 'ran', |
| 109 | + 'running': 'ran', |
| 110 | + 'jogs': 'ran', |
| 111 | + 'runs': 'ran', |
| 112 | + 'sail': 'sail', |
| 113 | + 'sailing': 'sail', |
| 114 | + 'boat cruise': 'sail', |
| 115 | + 'sailboat': 'sail', |
| 116 | + 'santa claus': 'santa', |
| 117 | + 'father christmas': 'santa', |
| 118 | + 'skiing': 'ski', |
| 119 | + 'ski': 'ski', |
| 120 | + 'skis': 'ski', |
| 121 | + 'snowboarding': 'ski', |
| 122 | + 'snowshoeing': 'ski', |
| 123 | + 'snow shoe': 'ski', |
| 124 | + 'snow boarding': 'ski', |
| 125 | + 'soccer': 'soccer', |
| 126 | + 'swim': 'swam', |
| 127 | + 'swimming': 'swam', |
| 128 | + 'swims': 'swam', |
| 129 | + 'tennis': 'tennis', |
| 130 | + 'thanksgiving': 'thanksgiving', |
| 131 | + 'wedding': 'wed', |
| 132 | + 'wedding eve': 'wed', |
| 133 | + 'wedding-eve party': 'wed', |
| 134 | + 'weddings': 'wed', |
| 135 | + 'christmas': 'christmas', |
| 136 | + 'xmas': 'christmas', |
| 137 | + 'x-mas': 'christmas', |
| 138 | + 'yoga': 'yoga', |
| 139 | +} |
| 140 | + |
| 141 | + |
| 142 | +def generate_flare_link_from_lemmatized_word(lemmatized_word: str) -> str: |
| 143 | + """Generate a link to a flair by a given lemmatized word. |
| 144 | +
|
| 145 | + Args: |
| 146 | + lemmatized_word (str): The lemmatized word. |
| 147 | +
|
| 148 | + Returns: |
| 149 | + str: The suitable link. |
| 150 | + """ |
| 151 | + return f'{FLAIRS_REL_PATH}\\{lemmatized_word}{FLAIRS_EXTENSION}' |
| 152 | + |
| 153 | + |
| 154 | +def remove_non_alphabet_chars(text: str) -> str: |
| 155 | + """Remove non-alphabet chars from a given string |
| 156 | +
|
| 157 | + Args: |
| 158 | + text (str): The string to remove the non-alphabet chars from. |
| 159 | +
|
| 160 | + Returns: |
| 161 | + str: The string after the removal. |
| 162 | + """ |
| 163 | + regex = re.compile('[^a-zA-Z]') |
| 164 | + return regex.sub('', text) |
| 165 | + |
| 166 | + |
| 167 | +def get_image_name(related_word: str) -> Optional[str]: |
| 168 | + """Search the key of a given value in IMAGES_RELATED_WORDS_MAP dictionary. |
| 169 | +
|
| 170 | + Args: |
| 171 | + related_word (str): The value to search its key. |
| 172 | +
|
| 173 | + Returns: |
| 174 | + str: The value's key in IMAGES_RELATED_WORDS_MAP dictionary. |
| 175 | + """ |
| 176 | + shrunken = remove_non_alphabet_chars(related_word).lower() |
| 177 | + return IMAGES_RELATED_WORDS_MAP.get(shrunken) |
| 178 | + |
| 179 | + |
| 180 | +@lru_cache(maxsize=32) |
| 181 | +def search_token_in_related_words(token: str) -> Optional[str]: |
| 182 | + """Search a token in IMAGES_RELATED_WORDS_MAP dictionary. |
| 183 | +
|
| 184 | + Args: |
| 185 | + token (str): The token to search. |
| 186 | +
|
| 187 | + Returns: |
| 188 | + str: The link to the suitable image of the given token. |
| 189 | + """ |
| 190 | + key = get_image_name(token) |
| 191 | + if key: |
| 192 | + return generate_flare_link_from_lemmatized_word(key) |
| 193 | + |
| 194 | + |
| 195 | +def attach_image_to_event(event_content: str) -> str: |
| 196 | + """Get a link to the suitable image of a given token content. |
| 197 | +
|
| 198 | + Args: |
| 199 | + event_content (str): The event content. |
| 200 | +
|
| 201 | + Returns: |
| 202 | + str: The link to the suitable image of a given token content. |
| 203 | + """ |
| 204 | + event_tokens = word_tokenize(event_content) |
| 205 | + for token in event_tokens: |
| 206 | + if token.isalnum(): |
| 207 | + try: |
| 208 | + base_word = lemmatize(remove_non_alphabet_chars(token).lower()) |
| 209 | + except ValueError: |
| 210 | + base_word = token |
| 211 | + if base_word in IMAGES_RELATED_WORDS_MAP.values(): |
| 212 | + return generate_flare_link_from_lemmatized_word(base_word) |
| 213 | + link = search_token_in_related_words(token) |
| 214 | + if link: |
| 215 | + return link |
| 216 | + link = '#' |
| 217 | + return link |
0 commit comments