-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsimple.py
226 lines (180 loc) · 9.2 KB
/
simple.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
NHENTAI_BANNER ='''
███╗ ██╗██╗ ██╗███████╗███╗ ██╗████████╗ █████╗ ██╗
████╗ ██║██║ ██║██╔════╝████╗ ██║╚══██╔══╝██╔══██╗██║
██╔██╗ ██║███████║█████╗ ██╔██╗ ██║ ██║ ███████║██║
██║╚██╗██║██╔══██║██╔══╝ ██║╚██╗██║ ██║ ██╔══██║██║
██║ ╚████║██║ ██║███████╗██║ ╚████║ ██║ ██║ ██║██║
╚═╝ ╚═══╝╚═╝ ╚═╝╚══════╝╚═╝ ╚═══╝ ╚═╝ ╚═╝ ╚═╝╚═╝
██████╗ ██████╗ ██╗ ██╗███╗ ██╗██╗ ██████╗ █████╗ ██████╗ ███████╗██████╗
██╔══██╗██╔═══██╗██║ ██║████╗ ██║██║ ██╔═══██╗██╔══██╗██╔══██╗██╔════╝██╔══██╗
██║ ██║██║ ██║██║ █╗ ██║██╔██╗ ██║██║ ██║ ██║███████║██║ ██║█████╗ ██████╔╝
██║ ██║██║ ██║██║███╗██║██║╚██╗██║██║ ██║ ██║██╔══██║██║ ██║██╔══╝ ██╔══██╗
██████╔╝╚██████╔╝╚███╔███╔╝██║ ╚████║███████╗╚██████╔╝██║ ██║██████╔╝███████╗██║ ██║
╚═════╝ ╚═════╝ ╚══╝╚══╝ ╚═╝ ╚═══╝╚══════╝ ╚═════╝ ╚═╝ ╚═╝╚═════╝ ╚══════╝╚═╝ ╚═╝
'''
print(NHENTAI_BANNER)
import re
import os
import asyncio
import traceback
import httpx
from nhentai.parser import doujinshi_parser
from nhentai.logger import logger
from nhentai.utils import format_filename
from bs4 import BeautifulSoup
def extract_manga_id(url):
"""
Extract manga ID from nhentai URL
:param url: Full URL of the manga on nhentai
:return: Manga ID as a string
"""
# Use regex to extract ID from various possible URL formats
match = re.search(r'/g/(\d+)/?', url)
if match:
return match.group(1)
# If no match found, try to use the last part of the URL
parts = url.rstrip('/').split('/')
for part in reversed(parts):
if part.isdigit():
return part
raise ValueError(f"Could not extract manga ID from URL: {url}")
def safe_format_filename(name):
"""
Safely format filename, handling None and empty string cases
:param name: Input name to format
:return: Formatted filename
"""
if not name:
return ''
try:
return format_filename(name)
except Exception:
# If format_filename fails, do a basic sanitization
return re.sub(r'[<>:"/\\|?*]', '', name).strip()
async def fetch_manga_images(manga_id):
"""
Fetch manga image URLs by scraping the gallery page
:param manga_id: ID of the manga
:return: List of image URLs with their original page numbers
"""
async with httpx.AsyncClient() as client:
# Fetch the gallery page
url = f"https://nhentai.net/g/{manga_id}/"
try:
response = await client.get(url)
# Check if request was successful
if response.status_code != 200:
print(f"Failed to fetch gallery page. Status code: {response.status_code}")
return []
# Parse the HTML
soup = BeautifulSoup(response.text, 'html.parser')
# Find image elements
image_elements = soup.select('div.thumb-container img')
# Extract unique image URLs while preserving order
image_info = []
seen_urls = set()
for img in image_elements:
# Try to get the full image URL
img_src = img.get('data-src') or img.get('src')
if img_src:
# Convert to full image URL if needed
if img_src.startswith('//'):
img_src = f"https:{img_src}"
# Ensure we get the full-size image URL
img_src = img_src.replace('/t.', '/i.')
# Extract page number from URL or use a default
try:
page_match = re.search(r'/(\d+)\.[a-z]+$', img_src)
page_num = int(page_match.group(1)) if page_match else len(image_info) + 1
except Exception:
page_num = len(image_info) + 1
# Add only if not already seen
if img_src not in seen_urls:
image_info.append((page_num, img_src))
seen_urls.add(img_src)
# Sort by page number to ensure correct order
return [url for _, url in sorted(image_info, key=lambda x: x[0])]
except Exception as e:
print(f"Error fetching manga images: {e}")
return []
async def download_images(manga_id, image_urls, download_folder):
"""
Download images for a manga
:param manga_id: ID of the manga
:param image_urls: List of image URLs
:param download_folder: Folder to save images
:return: List of downloaded file paths
"""
os.makedirs(download_folder, exist_ok=True)
async with httpx.AsyncClient() as client:
downloaded_files = []
for index, img_url in enumerate(image_urls, 1):
try:
# Determine file extension
ext = img_url.split('.')[-1]
if ext not in ['jpg', 'png', 'webp', 'gif']:
ext = 'jpg'
# Prepare filename
filename = os.path.join(download_folder, f"{index:03d}.{ext}")
# Download image
response = await client.get(img_url)
if response.status_code == 200:
with open(filename, 'wb') as f:
f.write(response.content)
downloaded_files.append(filename)
print(f"Downloaded: {filename}")
else:
print(f"Failed to download {img_url}. Status code: {response.status_code}")
except Exception as e:
print(f"Error downloading image {img_url}: {e}")
return downloaded_files
def download_manga(url):
"""
Download a manga from an nhentai URL
:param url: Full URL of the manga on nhentai
:return: Path to downloaded manga
"""
try:
# Extract manga ID from URL
manga_id = extract_manga_id(url)
# Parse the manga details
doujinshi_info = doujinshi_parser(manga_id)
# Print out the doujinshi info for debugging
print("Doujinshi Info:", doujinshi_info)
# Prepare safe name and pretty_name
name = safe_format_filename(doujinshi_info.get('name', ''))
pretty_name = safe_format_filename(doujinshi_info.get('pretty_name', ''))
# Create a download folder
download_folder = name or pretty_name or str(manga_id)
download_folder = re.sub(r'[<>:"/\\|?*]', '', download_folder).strip()
# Fetch image URLs
image_urls = asyncio.run(fetch_manga_images(manga_id))
if not image_urls:
print("No image URLs found.")
return None
# Download images
downloaded_files = asyncio.run(download_images(manga_id, image_urls, download_folder))
if downloaded_files:
print(f"Successfully downloaded {len(downloaded_files)} files to: {download_folder}")
return download_folder
else:
print("Failed to download any images.")
return None
except Exception as e:
print(f"Error downloading manga: {e}")
print("Detailed traceback:")
traceback.print_exc()
return None
def main():
# Example usage
manga_url = input("Enter the nhentai manga URL: ").strip()
downloaded_path = download_manga(manga_url)
if downloaded_path:
print(f"Manga downloaded to: {downloaded_path}")
else:
print("Failed to download manga.")
if __name__ == '__main__':
# Configure logger to show more information
logger.setLevel('DEBUG')
# Run the main function
main()