-
Notifications
You must be signed in to change notification settings - Fork 0
/
bot.py
268 lines (233 loc) · 10.6 KB
/
bot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
"""
TagesschauTelegramBot
Share the current episode of Tagesschau/Tagesthemen with a quick inline command.
"""
import logging
import json
import requests
from telegram.ext import Updater, InlineQueryHandler, PicklePersistence
from telegram import InlineQueryResultVideo, InputTextMessageContent, ParseMode
from lxml import etree
logging.basicConfig(
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO
)
CACHE_TIME = 10
SHOW_CONFIG = get_config()
def get_newest_episode_from_podcast_feed(podcast_url, input_message_content=False):
"""
Get the newest episode from the podcast feed and return a list of InlineQueryResultVideos
with the only member containing that episode.
Keyword arguments:
podcast_url (str): URL of the podcast where the episode should be taken from. The podcast must order its items from
most recent to oldest, because the first item is picked without any time comparisons.
input_message_content (bool): Set the optional parameter input_message_content if the result should only contain a link.
This is important if the podcast file is bigger than 20 MB because the response will fail.
"""
rss_response = requests.get(podcast_url)
root = etree.fromstring(rss_response.content)
channel = root.find("channel")
item = channel.find("item")
title = item.find("title").text
description = item.find("description").text
uid = item.find("guid").text
thumb = item.find("itunes:image", root.nsmap).attrib["href"]
enclosure = item.find("enclosure")
url = enclosure.attrib["url"]
mime_type = enclosure.attrib["type"]
if not input_message_content:
return [
InlineQueryResultVideo(
id=uid,
video_url=url,
mime_type=mime_type,
thumb_url=thumb,
title=title,
caption=description,
description=description,
)
]
content = InputTextMessageContent(f"{url}", parse_mode=ParseMode.MARKDOWN)
return [
InlineQueryResultVideo(
input_message_content=content,
id=uid,
video_url=url,
mime_type=mime_type,
thumb_url=thumb,
title=title,
caption=description,
description=description,
)
]
def get_newest_episode_from_yt_feed(yt_url):
"""
Get the newest episode from the yt feed with a specific title and return a list of
InlineQueryResultVideos with the only member containing that episode.
Keyword arguments:
yt_url (str): URL of the youtube atom feed where the episode should be taken from. The episode
will be filterd out based on the title.
"""
rss_response = requests.get(yt_url)
root = etree.fromstring(rss_response.content)
entries = root.findall("entry", root.nsmap)
for entry in entries:
vid_title = entry.find("title", entry.nsmap)
if "tagesthemen" in vid_title.text.lower():
media = entry.find("media:group", entry.nsmap)
uid = entry.find("yt:videoId", entry.nsmap).text
title = vid_title.text
thumb = media.find("media:thumbnail", media.nsmap).attrib["url"]
description = media.find("media:description", media.nsmap).text
url = entry.find("link", entry.nsmap).attrib["href"]
content = InputTextMessageContent(f"{url}", parse_mode=ParseMode.MARKDOWN)
return [
InlineQueryResultVideo(
input_message_content=content,
id=uid,
video_url=url,
mime_type="text/html",
thumb_url=thumb,
title=title,
caption=description,
description=description,
)
]
return []
def inline_query_handler(update, context):
"""
Inline query handler for the bot. Returns the appropriate shows as a result.
query_text must be in the space seperated argument form: <keyword> <quality> <send-link>. Check the README for more information.
"""
query_text = update.inline_query.query
query_id = update.inline_query.id
if not query_text:
query_text = "all"
logging.info("Query %s", query_text)
query_text = query_text.lower()
query_args = query_text.split()
keyword_arg = query_args[0]
quality_arg = query_args[1] if len(query_args) > 1 else None
input_message_content_arg = query_args[2] if len(query_args) > 2 else None
answer = []
for show in SHOW_CONFIG:
if keyword_arg in show["keywords"]:
if quality_arg:
try:
quality = quality_arg
feed = show["quality"][quality]
except KeyError:
continue
else:
quality = show["default_quality"]
feed = show["quality"][quality]
if input_message_content_arg is not None:
input_message_content = input_message_content_arg.lower() in [
"true",
"1",
"yes",
]
else:
input_message_content = show["input_message_content"]
if quality == "yt":
answer += get_newest_episode_from_yt_feed(feed)
else:
answer += get_newest_episode_from_podcast_feed(
feed, input_message_content
)
if len(answer) > 0:
logging.info(list(map(lambda x: x.video_url, answer)))
context.bot.answer_inline_query(query_id, answer, cache_time=CACHE_TIME)
def get_config():
"""
Configuration for the different show. Format:
{
"keywords": List of all possible keywords, this this show should be found with.
"quality": Dict of feeds for the different quality versions of this show. key:
quality-identifier, value: feed-url.
"default_quality": String with the default quality-indentifiert, that should be
chosen if one isnt specified. This should preferably be a feed,
that produces files < 20MB so that they can be shown in the video player.
"input_message_content": If the video result should use input_message_content to just print
the url.
This should be set to true, if no feed can get under filesize < 20MB to
allow basic functionality wihtout errors.
}
"""
# pylint: disable=C0301
SHOW_CONFIG = [
{
"keywords": ["all", "tagesschau", "schau"],
"quality": {
"webxl": "https://www.tagesschau.de/export/video-podcast/webxl/tagesschau_https/",
"webl": "https://www.tagesschau.de/export/video-podcast/webl/tagesschau_https/",
"webm": "https://www.tagesschau.de/export/video-podcast/webm/tagesschau_https/",
"webs": "https://www.tagesschau.de/export/video-podcast/webs/tagesschau_https/",
},
"default_quality": "webs",
"input_message_content": False,
},
{
"keywords": ["all", "tagesthemen", "themen"],
"quality": {
"webxl": "https://www.tagesschau.de/export/video-podcast/webxl/tagesthemen_https/",
"webl": "https://www.tagesschau.de/export/video-podcast/webl/tagesthemen_https/",
"webm": "https://www.tagesschau.de/export/video-podcast/webm/tagesthemen_https/",
"webs": "https://www.tagesschau.de/export/video-podcast/webs/tagesthemen_https/",
"yt": "https://www.youtube.com/feeds/videos.xml?channel_id=UC5NOEUbkLheQcaaRldYW5GA",
},
"default_quality": "yt",
"input_message_content": True,
},
{
"keywords": ["all", "tagesschau100", "schau100", "100"],
"quality": {
"webxl": "https://www.tagesschau.de/export/video-podcast/webxl/tagesschau-in-100-sekunden_https/",
"webl": "https://www.tagesschau.de/export/video-podcast/webl/tagesschau-in-100-sekunden_https/",
"webm": "https://www.tagesschau.de/export/video-podcast/webm/tagesschau-in-100-sekunden_https/",
},
"default_quality": "webm",
"input_message_content": False,
},
{
"keywords": ["all", "nachtmagazin", "nacht"],
"quality": {
"webxl": "https://www.tagesschau.de/export/video-podcast/webxl/nachtmagazin_https/",
"webl": "https://www.tagesschau.de/export/video-podcast/webl/nachtmagazin_https/",
"webm": "https://www.tagesschau.de/export/video-podcast/webm/nachtmagazin_https/",
"webs": "https://www.tagesschau.de/export/video-podcast/webs/nachtmagazin_https/",
},
"default_quality": "webl",
"input_message_content": True,
},
{
"keywords": ["all", "berichtausberlin", "bericht", "berlin", "bab"],
"quality": {
"webxl": "https://www.tagesschau.de/export/video-podcast/webxl/bab_https/",
"webl": "https://www.tagesschau.de/export/video-podcast/webl/bab_https/",
"webm": "https://www.tagesschau.de/export/video-podcast/webm/bab_https/",
"webs": "https://www.tagesschau.de/export/video-podcast/webs/bab_https/",
},
"default_quality": "webl",
"input_message_content": True,
},
{
"keywords": ["all", "tageschau20", "20"],
"quality": {
"webxl": "https://www.tagesschau.de/export/video-podcast/webxl/tagesschau-vor-20-jahren_https/",
"webl": "https://www.tagesschau.de/export/video-podcast/webl/tagesschau-vor-20-jahren_https/",
"webm": "https://www.tagesschau.de/export/video-podcast/webm/tagesschau-vor-20-jahren_https/",
"webs": "https://www.tagesschau.de/export/video-podcast/webs/tagesschau-vor-20-jahren_https/",
},
"default_quality": "webs",
"input_message_content": False,
},
]
return SHOW_CONFIG
if __name__ == "__main__":
with open("credentials.json") as config:
key = json.load(config)["key"]
bot_persistence = PicklePersistence(filename="bot_data", store_bot_data=True)
updater = Updater(key, persistence=bot_persistence, use_context=True)
updater.dispatcher.add_handler(InlineQueryHandler(inline_query_handler))
updater.start_polling()
updater.idle()