diff --git a/plugins/Web/plugin.py b/plugins/Web/plugin.py
index c56da8535..c4d1dfbe5 100644
--- a/plugins/Web/plugin.py
+++ b/plugins/Web/plugin.py
@@ -150,22 +150,47 @@ def noIgnore(self, irc, msg):
def getTitle(self, irc, url, raiseErrors, msg):
size = conf.supybot.protocols.http.peekSize()
- parsed_url = utils.web.urlparse(url)
- if parsed_url.netloc in ('youtube.com', 'youtu.be') \
- or parsed_url.netloc.endswith(('.youtube.com')):
- # there is a lot of Javascript before the
- size = max(819200, size)
- if parsed_url.netloc in ('reddit.com', 'www.reddit.com', 'new.reddit.com'):
- # Since 2022-03, New Reddit has 'Reddit - Dive into anything' as
- # on every page.
- parsed_url = parsed_url._replace(netloc='old.reddit.com')
- url = utils.web.urlunparse(parsed_url)
-
+ def url_workaround(url):
+ """Returns a new URL that should be the target of a new request,
+ or None if the request is fine as it is.
+
+ The returned URL may be the same as the parameter, in case
+ something else was changed by this function through side-effects.
+ """
+ nonlocal size
+ parsed_url = utils.web.urlparse(url)
+ print(repr(parsed_url.netloc))
+ if parsed_url.netloc in ('youtube.com', 'youtu.be') \
+ or parsed_url.netloc.endswith(('.youtube.com')):
+ # there is a lot of Javascript before the
+ if size < 819200:
+ size = max(819200, size)
+ return url
+ else:
+ return None
+ if parsed_url.netloc in ('reddit.com', 'www.reddit.com', 'new.reddit.com'):
+ # Since 2022-03, New Reddit has 'Reddit - Dive into anything' as
+ # on every page.
+ parsed_url = parsed_url._replace(netloc='old.reddit.com')
+ url = utils.web.urlunparse(parsed_url)
+ self.log.debug("Rewrite URL to %s", url)
+ return url
+
+ return None
+
+ url = url_workaround(url) or url
timeout = self.registryValue('timeout')
headers = conf.defaultHttpHeaders(irc.network, msg.channel)
try:
fd = utils.web.getUrlFd(url, timeout=timeout, headers=headers)
target = fd.geturl()
+ fixed_target = url_workaround(target)
+ if fixed_target is not None:
+ # happens when using minification services linking to one of
+ # the websites handled by url_workaround; eg. v.redd.it
+ fd.close()
+ fd = utils.web.getUrlFd(fixed_target, timeout=timeout, headers=headers)
+ target = fd.geturl()
text = fd.read(size)
response_headers = fd.headers
fd.close()
diff --git a/plugins/Web/test.py b/plugins/Web/test.py
index e8ecdff33..b35ceb5e7 100644
--- a/plugins/Web/test.py
+++ b/plugins/Web/test.py
@@ -84,6 +84,9 @@ def testtitleReddit(self):
self.assertRegexp(
'title https://www.reddit.com/r/irc/',
'Internet Relay Chat')
+ self.assertRegexp(
+ 'title https://v.redd.it/odhemxo6giud1',
+ 'Small Kitty Big Goals : MadeMeSmile')
def testTitleMarcinfo(self):
# Checks that we don't crash on 'Content-Type: text/html;'