Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Rule34vault] Added support #6240

Merged
merged 2 commits into from
Oct 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/supportedsites.md
Original file line number Diff line number Diff line change
Expand Up @@ -757,6 +757,12 @@ Consider all listed sites to potentially be NSFW.
<td>Galleries, individual Images</td>
<td></td>
</tr>
<tr>
<td>R34 Vault</td>
<td>https://rule34vault.com/</td>
<td>Playlists, Posts, Tag Searches</td>
<td></td>
</tr>
<tr>
<td>Read Comic Online</td>
<td>https://readcomiconline.li/</td>
Expand Down
1 change: 1 addition & 0 deletions gallery_dl/extractor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@
"reddit",
"redgifs",
"rule34us",
"rule34vault",
"sankaku",
"sankakucomplex",
"seiga",
Expand Down
103 changes: 103 additions & 0 deletions gallery_dl/extractor/rule34vault.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
# -*- coding: utf-8 -*-

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

"""Extractors for https://rule34vault.com/"""

from .booru import BooruExtractor
from .. import text

BASE_PATTERN = r"(?:https?://)?rule34vault\.com"


class Rule34vaultExtractor(BooruExtractor):
category = "rule34vault"
root = "https://rule34vault.com"
root_cdn = "https://r34xyz.b-cdn.net"
filename_fmt = "{category}_{id}.{extension}"
per_page = 100

def _file_url(self, post):
post_id = post["id"]
extension = "jpg" if post["type"] == 0 else "mp4"
return "{}/posts/{}/{}/{}.{}".format(
self.root_cdn, post_id // 1000, post_id, post_id, extension)

def _prepare(self, post):
post.pop("files", None)
post["date"] = text.parse_datetime(
post["created"], "%Y-%m-%dT%H:%M:%S.%fZ")
if "tags" in post:
post["tags"] = [t["value"] for t in post["tags"]]

def _tags(self, post, _):
if "tags" not in post:
post.update(self._fetch_post(post["id"]))

def _fetch_post(self, post_id):
url = "{}/api/v2/post/{}".format(self.root, post_id)
return self.request(url).json()

def _pagination(self, endpoint, params=None):
url = "{}/api{}".format(self.root, endpoint)

if params is None:
params = {}
params["CountTotal"] = True
params["Skip"] = self.page_start * self.per_page
params["take"] = self.per_page

while True:
data = self.request(url, method="POST", json=params).json()

yield from data["items"]

if params["Skip"] + params["take"] > data["totalCount"]:
return
if "cursor" in data:
params["cursor"] = data["cursor"]
params["Skip"] += params["take"]


class Rule34vaultPostExtractor(Rule34vaultExtractor):
subcategory = "post"
archive_fmt = "{id}"
pattern = BASE_PATTERN + r"/post/(\d+)"
example = "https://rule34vault.com/post/399437"

def posts(self):
return (self._fetch_post(self.groups[0]),)


class Rule34vaultPlaylistExtractor(Rule34vaultExtractor):
subcategory = "playlist"
directory_fmt = ("{category}", "{playlist_id}")
archive_fmt = "p_{playlist_id}_{id}"
pattern = BASE_PATTERN + r"/playlists/view/(\d+)"
example = "https://rule34vault.com/playlists/view/2"

def metadata(self):
return {"playlist_id": self.groups[0]}

def posts(self):
endpoint = "/v2/post/search/playlist/" + self.groups[0]
return self._pagination(endpoint)


class Rule34vaultTagExtractor(Rule34vaultExtractor):
subcategory = "tag"
directory_fmt = ("{category}", "{search_tags}")
archive_fmt = "t_{search_tags}_{id}"
pattern = BASE_PATTERN + r"/([^/?#]+)$"
example = "https://rule34vault.com/TAG"

def metadata(self):
self.tags = text.unquote(self.groups[0]).split("%7C")
return {"search_tags": " ".join(self.tags)}

def posts(self):
endpoint = "/v2/post/search/root"
params = {"includeTags": [t.replace("_", " ") for t in self.tags]}
return self._pagination(endpoint, params)
1 change: 1 addition & 0 deletions scripts/supportedsites.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@
"rule34" : "Rule 34",
"rule34hentai" : "Rule34Hentai",
"rule34us" : "Rule 34",
"rule34vault" : "R34 Vault",
"sankaku" : "Sankaku Channel",
"sankakucomplex" : "Sankaku Complex",
"seiga" : "Niconico Seiga",
Expand Down
41 changes: 41 additions & 0 deletions test/results/rule34vault.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# -*- coding: utf-8 -*-

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

from gallery_dl.extractor import rule34vault


__tests__ = (
{
"#url" : "https://rule34vault.com/sfw",
"#class": rule34vault.Rule34vaultTagExtractor,
"#pattern": r"https://r34xyz\.b-cdn\.net/posts/\d+/\d+/\d+\.(jpg|mp4)",
"#range" : "1-10",
"#count" : 10,
},

{
"#url" : "https://rule34vault.com/post/486545",
"#class": rule34vault.Rule34vaultPostExtractor,
"#pattern" : r"https://r34xyz\.b-cdn.net/posts/486/486545/486545\.jpg",
"#sha1_content": "8f53c4c9d049842d23b51fb3cf8ce11bcbe21f07",
},

{
"#url" : "https://rule34vault.com/post/382937",
"#comment": "video",
"#class" : rule34vault.Rule34vaultPostExtractor,
"#pattern" : r"https://r34xyz\.b-cdn.net/posts/382/382937/382937\.mp4",
"#sha1_content": "b962e3e2304139767c3792508353e6e83a85a2af",
},

{
"#url" : "https://rule34vault.com/playlists/view/20164",
"#class": rule34vault.Rule34vaultPlaylistExtractor,
"#pattern": r"https://r34xyz\.b-cdn\.net/posts/\d+/\d+/\d+\.(jpg|mp4)",
"#count" : 55,
},

)
Loading