From b2c3db3e24d7360de34cc0ad4cd80e623be85cd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 20 Oct 2023 15:22:44 +0200 Subject: [PATCH] [bunkr] add extractor for media URLs (#4684) --- gallery_dl/extractor/bunkr.py | 34 ++++++++++++++++++++++++++++++++- test/results/bunkr.py | 36 ++++++++++++++++++++++++++++++++--- 2 files changed, 66 insertions(+), 4 deletions(-) diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py index 5509f5a87c..d5e63b22b7 100644 --- a/gallery_dl/extractor/bunkr.py +++ b/gallery_dl/extractor/bunkr.py @@ -12,6 +12,8 @@ from .. import text from urllib.parse import urlsplit, urlunsplit +BASE_PATTERN = r"(?:https?://)?(?:app\.)?bunkr+\.(?:la|[sr]u|is|to)" + MEDIA_DOMAIN_OVERRIDES = { "cdn9.bunkr.ru" : "c9.bunkr.ru", "cdn12.bunkr.ru": "media-files12.bunkr.la", @@ -28,7 +30,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor): """Extractor for bunkrr.su albums""" category = "bunkr" root = "https://bunkrr.su" - pattern = r"(?:https?://)?(?:app\.)?bunkr+\.(?:la|[sr]u|is|to)/a/([^/?#]+)" + pattern = BASE_PATTERN + r"/a/([^/?#]+)" example = "https://bunkrr.su/a/ID" def fetch_album(self, album_id): @@ -72,3 +74,33 @@ def _extract_files(self, urls): url = urlunsplit((scheme, domain, path, query, fragment)) yield {"file": text.unescape(url)} + + +class BunkrMediaExtractor(LolisafeAlbumExtractor): + """Extractor for bunkrr.su media links""" + category = "bunkr" + subcategory = "media" + root = "https://bunkrr.su" + directory_fmt = ("{category}",) + pattern = BASE_PATTERN + r"/[vi]/([^/?#]+)" + example = "https://bunkrr.su/v/FILENAME" + + def fetch_album(self, album_id): + try: + path = urlsplit(self.url).path + page = self.request(self.root + path).text + if path[1] == "v": + url = text.extr(page, '