-
Notifications
You must be signed in to change notification settings - Fork 21
/
Copy pathlightnovel.py
220 lines (183 loc) · 5.84 KB
/
lightnovel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
import os
import sys
import traceback
import base64
from bs4 import BeautifulSoup
from ebooklib import epub
from utils import echo
from utils import downloader
from utils.checker import is_not_null, is_null
class LightNovel():
'''
object of light novel
'''
source = ''
'''
source of light novel
'''
authors = []
'''
author (Optional)
'''
identifier = None
'''
identifier (Optional)
'''
title = ''
'''
title of the book
'''
contents = ''
'''
can be either str or list
the format of list is [{'conent': content, 'title': title}]
'''
cover_link = None
'''
link of cover. if `None`, then use the first picture of webpage.\n
cover_link can either be web link or file path.\n
if it is not beginned with `http`, it would be recognized as file path.
'''
def __init__(self, source: str, authors=None, identifier=None, title=None, cover_link=None):
'''
initialize light novel object
'''
self.source = source
if authors is not None: self.authors = authors
if identifier is not None: self.identifier = identifier
if title is not None: self.title = title
if cover_link is not None: self.cover_link = cover_link
def process_image_content(self, content, book):
'''
process image content
:param content: HTML content
:return: content, first image bytes
'''
echo.push_subroutine(sys._getframe().f_code.co_name)
# parse images
try:
soup = BeautifulSoup(content, 'lxml')
image_tags = soup.find_all('img')
except Exception as e:
echo.cerr(f'Error: {repr(e)}')
traceback.print_exc()
echo.cexit('PARSING HTML FAILED')
# store images
first_flag = True # flag for storing first image
first_image = None
try:
i = 0
for tag in image_tags:
i = i + 1
echo.clog(f'Processing images: ({i} / {len(image_tags)})')
# parse
link = str(tag.attrs['src'])
r_image = None
if link.startswith('data:'):
r_image = base64.decodebytes(link.split(';')[1].split(',')[1].encode('utf-8'))
else:
file_name = os.path.basename(link)
file_dir = link
# convert href
tag.attrs['src'] = f'../Images/{file_name}'
image = epub.EpubImage()
image.file_name = f'Images/{file_name}'
image.content = open(file_dir, 'rb').read()
book.add_item(image)
r_image = image.content
if first_flag and r_image is not None:
first_image = r_image
first_flag = False
except Exception as e:
echo.cerr(f'Error: {repr(e)}')
traceback.print_exc()
echo.cexit('PROCESSING IMAGES FAILED')
return soup, first_image
def write_epub(self, path: str):
'''
generate the ebook to `path`
'''
echo.push_subroutine(sys._getframe().f_code.co_name)
echo.clog(f'start generating...')
try:
# create epub book
book = epub.EpubBook()
# set metadata
if is_not_null(self.authors):
for author in self.authors:
book.add_author(author=author)
if is_not_null(self.identifier):
book.set_identifier(self.identifier)
book.set_title(self.title)
book.set_language('zh-CN')
except Exception as e:
echo.cerr(f'Error: {repr(e)}')
traceback.print_exc()
echo.cexit('CREATING EPUB BOOK FAILED')
if type(self.contents) == str:
soup, first_image = self.process_image_content(self.contents, book)
elif type(self.contents) == list:
_contents = []
first_image = None
for content in self.contents:
soup, _first_image = self.process_image_content(content['content'], book)
if first_image is None:
first_image = _first_image
_contents.append({
'content': str(soup),
'title': content['title'],
})
else:
echo.cexit('CONTENTS MUST BE STRING OR LIST')
# set cover
try:
if is_not_null(self.cover_link):
if os.path.exists(self.cover_link):
book.set_cover(os.path.basename(self.cover_link), open(self.cover_link, 'rb').read())
elif first_image is not None:
book.set_cover('cover', first_image)
except Exception as e:
echo.cerr(f'Error: {repr(e)}')
traceback.print_exc()
echo.cexit('SETTING COVER IMAGE FAILED')
try:
# set content
about_text = f'<p>本书由<a href="https://github.com/JeffersonQin/lightnovel_epub">JeffersonQin/lightnovel_epub</a>工具自动生成。<br>仅供学习交流使用,禁作商业用途。</p><br><p>本书根据 {self.source} 生成</p>'
if type(self.contents) == str:
about_content = epub.EpubHtml(title='关于本电子书', file_name='Text/about.xhtml', lang='zh-CN', content=about_text)
main_content = epub.EpubHtml(title=self.title, file_name='Text/lightnovel.xhtml', lang='zh-CN', content=str(soup))
book.add_item(about_content)
book.add_item(main_content)
# configure book
book.toc = (about_content, main_content)
# add default NCX and Nav file
book.add_item(epub.EpubNcx())
book.add_item(epub.EpubNav())
book.spine = ['nav', about_content, main_content]
elif type(self.contents) == list:
about_content = epub.EpubHtml(title='关于本电子书', file_name='Text/about.xhtml', lang='zh-CN', content=about_text)
i = 0
epub_nav = ['nav', about_content]
epub_toc = [about_content]
epub_contents = [about_content]
for content in _contents:
i += 1
item = (epub.EpubHtml(title=content['title'], file_name=f'Text/Section{i}.xhtml', lang='zh-CN', content=content['content']))
epub_toc.append(item)
epub_nav.append(item)
epub_contents.append(item)
for epub_content in epub_contents:
book.add_item(epub_content)
# configure book
book.toc = tuple(epub_toc)
# add default NCX and Nav file
book.add_item(epub.EpubNcx())
book.add_item(epub.EpubNav())
book.spine = epub_nav
# generate book
epub.write_epub(os.path.join(path, f'{self.title}.epub'), book, {})
except Exception as e:
echo.cerr(f'Error: {repr(e)}')
traceback.print_exc()
echo.cexit('BOOK CONFIGURATION & GENERATION FAILED')
echo.pop_subroutine()