forked from JeffersonQin/lightnovel_epub
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmobile.py
645 lines (591 loc) · 20.7 KB
/
mobile.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
import uiautomator2 as u2
from bs4 import BeautifulSoup
import cv2
import time
import numpy as np
import sys
import pickle
import os
import traceback
import click
import dominate
from utils import echo
from lightnovel import LightNovel
# define constants
TOP_AREA_HEIGHT = 325
BOTTOM_AREA_HEIGHT = 200
IMAGE_EQUAL_THRESHOLD = 1
SAFE_AREA_PADDING = 20
DUMP_PATH = './dump'
echo.init_subroutine()
echo.push_subroutine('Global')
# connect to device
d = u2.connect()
# print device info
echo.clog('Device Info:', d.info)
class Image:
filePath: str = None
shape = None
def __init__(self, filePath, shape):
self.filePath = filePath
self.shape = shape
class EOF:
def __init__(self):
pass
def rotate2landscape():
"""
rotate screen to landscape
:return: None
"""
echo.push_subroutine(sys._getframe().f_code.co_name)
try:
echo.clog('Rotate screen to landscape.')
d.set_orientation('l')
except Exception as e:
echo.cerr(f'Error: {repr(e)}')
traceback.print_exc()
echo.cexit('ROTATE TO LANDSCAPE FAILED')
finally:
echo.pop_subroutine()
def rotate2portrait():
"""
rotate screen to portrait
:return: None
"""
echo.push_subroutine(sys._getframe().f_code.co_name)
try:
echo.clog('Rotate screen to portrait.')
d.set_orientation('n')
except Exception as e:
echo.cerr(f'Error: {repr(e)}')
traceback.print_exc()
echo.cexit('ROTATE TO PORTRAIT FAILED')
finally:
echo.pop_subroutine()
def take_screenshot():
"""
Take a screenshot
:return: a numpy array of the screenshot
"""
echo.push_subroutine(sys._getframe().f_code.co_name)
try:
echo.clog('Take a screenshot.')
screenshot = d.screenshot(format='opencv')
if (d.info['displayHeight'] - d.info['displayWidth']) * \
(screenshot.shape[0] - screenshot.shape[1]) < 0:
screenshot = cv2.rotate(screenshot, cv2.ROTATE_90_COUNTERCLOCKWISE)
return screenshot
except Exception as e:
echo.cerr(f'Error: {repr(e)}')
traceback.print_exc()
echo.cexit('TAKE SCREENSHOT FAILED')
finally:
echo.pop_subroutine()
def get_image(x1, y1, x2, y2) -> Image:
"""
Get image from device screen
:param x1: top left x of image bound
:param y1: top left y of image bound
:param x2: bottom right x of image bound
:param y2: bottom right y of image bound
:return: Image object
"""
echo.push_subroutine(sys._getframe().f_code.co_name)
try:
# click center of image
click_x = x1 * 0.7 + x2 * 0.3
click_y = (y1 + y2) / 2
if click_y > d.info['displayHeight'] - BOTTOM_AREA_HEIGHT or click_y < TOP_AREA_HEIGHT:
if y1 < TOP_AREA_HEIGHT:
click_y = (y2 + TOP_AREA_HEIGHT) / 2
elif y2 > d.info['displayHeight'] - BOTTOM_AREA_HEIGHT:
click_y = (y1 + d.info['displayHeight'] - BOTTOM_AREA_HEIGHT) / 2
else:
raise Exception('Image is not in the visible area')
click_x = click_x / d.info['displayWidth']
click_y = click_y / d.info['displayHeight']
time.sleep(1)
echo.clog('Click at:', click_x, click_y)
d.click(click_x, click_y)
time.sleep(1)
# take screenshot
screenshot = take_screenshot()
# go back
echo.clog('Press back.')
d.press('back')
# get ride of navigation bar part of image
cropped_image = screenshot[TOP_AREA_HEIGHT:, :, :]
# calculate pixel sum of image on vertical direction
vert = np.sum(np.sum(cropped_image, axis=2), axis=1)
# get black border
u = 0
for i in vert:
if i == 0:
u += 1
else:
break
vert = vert[::-1]
b = 0
for i in vert:
if i == 0:
b += 1
else:
break
# crop black border out of image
if b == 0:
vert_cropped_image = cropped_image[u:, :, :]
else:
vert_cropped_image = cropped_image[u: -b, :, :]
# calculate pixel sum of image on horizontal direction
horz = np.sum(np.sum(vert_cropped_image, axis=2), axis=0)
# get black border
l = 0
for i in horz:
if i == 0:
l += 1
else:
break
horz = horz[::-1]
r = 0
for i in horz:
if i == 0:
r += 1
else:
break
# crop black border out of image
if r == 0:
horz_cropped_image = vert_cropped_image[:, l:, :]
else:
horz_cropped_image = vert_cropped_image[:, l: -r, :]
# save image
image_name = os.path.join(DUMP_PATH, f'./{time.time_ns()}.png')
cv2.imwrite(image_name, horz_cropped_image)
echo.clog("Image saved,", image_name)
# return image object
return Image(image_name, horz_cropped_image.shape)
except Exception as e:
echo.cerr(f'Error: {repr(e)}')
traceback.print_exc()
echo.cexit('GET IMAGE FAILED')
finally:
echo.pop_subroutine()
def get_visible_elements(quick_mode=False):
"""
Get all visible elements on the screen
:return: a list of visible elements
"""
echo.push_subroutine(sys._getframe().f_code.co_name)
try:
contents = []
# dump hierarchy
xml = d.dump_hierarchy()
soup = BeautifulSoup(xml, 'lxml')
# select contents
contentView = soup.select('node[resource-id="android:id/content"] > node[class="android.widget.FrameLayout"] > node[class="android.widget.FrameLayout"] > node[class="android.view.View"] > node[class="android.view.View"] > node[class="android.view.View"] > node[class="android.view.View"] > node[class="android.view.View"] > node[class="android.view.View"]')[0].findAll('node', recursive=False)
for child in contentView:
bounds_str = child.attrs['bounds']
bounds = bounds_str.split('][')
x1 = int(bounds[0].split(',')[0][1:])
y1 = int(bounds[0].split(',')[1])
x2 = int(bounds[1].split(',')[0])
y2 = int(bounds[1].split(',')[1][:-1])
if y1 + SAFE_AREA_PADDING > d.info['displayHeight'] - BOTTOM_AREA_HEIGHT \
or y2 - SAFE_AREA_PADDING < TOP_AREA_HEIGHT:
continue
if child.attrs['class'] == ['android.view.View']:
if child.attrs['content-desc'] == '合集帖子\n更多' or \
str(child.attrs['content-desc']).startswith('全部評論'):
contents.append(EOF())
break
contents.append(child.attrs['content-desc'])
elif child.attrs['class'] == ['android.widget.ImageView']:
if quick_mode:
contents.append(f'Image-Placeholder-{time.time_ns()}')
else:
contents.append(get_image(x1, y1, x2, y2))
return contents
except Exception as e:
echo.cerr(f'Error: {repr(e)}')
traceback.print_exc()
echo.cexit('GET VISIBLE ELEMENTS FAILED')
finally:
echo.pop_subroutine()
def scroll_to_top():
"""
scroll the page to the top
:return: None
"""
echo.push_subroutine(sys._getframe().f_code.co_name)
try:
last_top = None
while True:
d.swipe(0.5, 0.3, 0.5, 0.7, duration=0.1)
contents = get_visible_elements(quick_mode=True)
if contents[0] == last_top:
break
last_top = contents[0]
echo.clog('Scrolling to top... last_top:', last_top)
except Exception as e:
echo.cerr(f'Error: {repr(e)}')
traceback.print_exc()
echo.cexit('SCROLL TO TOP FAILED')
finally:
echo.pop_subroutine()
def compare_content(content1, content2):
"""
Compare two contents
:param content1: content 1
:param content2: content 2
:return: True if content1 == content2, False otherwise
"""
echo.push_subroutine(sys._getframe().f_code.co_name)
try:
t1 = type(content1)
t2 = type(content2)
if t1 == str and t2 == str:
return content1 == content2
if t1 != t2:
return False
if t1 != Image or t2 != Image:
raise Exception('Unsupported type.')
img1 = cv2.imread(content1.filePath)
img2 = cv2.imread(content2.filePath)
# judge whether shape of two images are same
if img1.shape != img2.shape:
return False
# convert image to gray scale
img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
# calculate diff
diff = cv2.absdiff(img1, img2)
echo.clog('Image Diff between', content1.filePath, 'and', content2.filePath, ":", np.average(diff))
return np.average(diff) < IMAGE_EQUAL_THRESHOLD
except Exception as e:
echo.cerr(f'Error: {repr(e)}')
traceback.print_exc()
echo.cexit('COMPARE CONTENT FAILED')
finally:
echo.pop_subroutine()
def get_content():
"""
scroll the page to the bottom and return the content of the page
:return: the content of the page
"""
echo.push_subroutine(sys._getframe().f_code.co_name)
try:
scroll_to_top()
time.sleep(2)
echo.clog('Already scrolled to the top')
contents = []
while True:
visible_contents = get_visible_elements()
if contents == []:
contents = visible_contents
else:
check_count = min(len(contents), len(visible_contents))
# iterate from check_count to zero
overlap_count = 0
for i in range(check_count, 0, -1):
flag = True
for j in range(i):
if not compare_content(contents[-(i - j)], visible_contents[j]):
flag = False
break
if flag:
overlap_count = i
break
if overlap_count < check_count:
new_contents = visible_contents[overlap_count:]
echo.clog('New contents:', new_contents)
contents.extend(new_contents)
else:
echo.clog('Nothing New')
# scroll down
d.swipe(0.5, 0.7, 0.5, 0.3, duration=0.5)
time.sleep(2)
echo.clog('Scrolled down.')
# check whether the page is at the bottom
if type(contents[-1]) == EOF:
echo.clog('Reached the end of the page.')
break
return contents[:-1]
except Exception as e:
echo.cerr(f'Error: {repr(e)}')
traceback.print_exc()
echo.cexit('GET CONTENT FAILED')
finally:
echo.pop_subroutine()
def dump_contents(contents):
"""
dump the contents to a file
:param contents: the contents to dump
:return: None
"""
echo.push_subroutine(sys._getframe().f_code.co_name)
try:
file_path = os.path.join(DUMP_PATH, f'./{time.time_ns()}.out')
pickle.dump(contents, open(file_path, 'wb+'))
echo.clog('Dumped contents to', file_path)
except Exception as e:
echo.cerr(f'Error: {repr(e)}')
traceback.print_exc()
echo.cexit('DUMP CONTENT FAILED')
finally:
echo.pop_subroutine()
def load_contents(file_path):
"""
load the contents from a file
:param file_path: the file path
:return: the contents
"""
echo.push_subroutine(sys._getframe().f_code.co_name)
try:
contents = pickle.load(open(file_path, 'rb'))
echo.clog('Loaded contents from', file_path)
return contents
except Exception as e:
echo.cerr(f'Error: {repr(e)}')
traceback.print_exc()
echo.cexit('LOAD CONTENT FAILED')
finally:
echo.pop_subroutine()
@click.group()
def cli():
pass
@cli.command()
@click.option('--top-area-height', default=325, help='the height of the top area')
@click.option('--bottom-area-height', default=200, help='the height of the bottom area')
@click.option('--image-equal-threshold', default=1, help='the threshold of judging whether two images are equal')
@click.option('--safe-area-padding', default=20, help='the padding of the safe area')
@click.option('--dump-path', type=click.Path(exists=True), default='./dump', help='directory for dumping')
@click.option('--vert-dump', type=click.Path(exists=True), default=None, help='vertical content dump file path')
@click.option('--horz-dump', type=click.Path(exists=True), default=None, help='horizontal content dump file path')
@click.option('--html-dump', type=click.Path(exists=True), default=None, help='html content dump file path')
@click.option('--conflict-mode', type=bool, default=False, help='whether to resolve conflict manually')
@click.option('--ignore-newline', type=bool, default=True, help='whether to ignore newline')
@click.option('--title', default=None, help='title of light novel')
@click.option('--authors', default=None, help='authors\' names, separated by comma (,)')
@click.option('--identifier', default=None, help='identifier of light novel')
@click.option('--cover-link', default=None, help='cover_link of light novel. cover_link can either be web link or file path. if it is not beginned with "http", it would be recognized as file path. if nothing was given, then it will use the first picture of webpage.')
@click.option('--cvt', default=None, help='OpenCC conversion configuration, used to convert between different Chinese characters. you can choose the value from "s2t", "t2s", "s2tw", "tw2s", "s2hk", "hk2s", "s2twp", "tw2sp", "t2tw", "hk2t", "t2hk", "t2jp", "jp2t", "tw2t". if nothing is provided, no conversion would be performed. for more information, please visit: https://github.com/BYVoid/OpenCC')
@click.option('--path', type=click.Path(exists=True), default='./', help='directory for saving the light novel')
def dump(top_area_height,
bottom_area_height,
image_equal_threshold,
safe_area_padding,
dump_path,
vert_dump,
horz_dump,
html_dump,
conflict_mode: bool,
ignore_newline: bool,
title: str,
authors: str,
identifier: str,
cover_link: str,
cvt: str,
path: str):
"""
dump the contents to a file
:param top_area_height: the height of the top area
:param bottom_area_height: the height of the bottom area
:param image_equal_threshold: the threshold of judging whether two images are equal
:param safe_area_padding: the padding of the safe area
:param dump_path: directory for dumping
:param vert_dump: vertical content dump file path
:param horz_dump: horizontal content dump file path
:param html_dump: html content dump file path
:param conflict_mode: whether to resolve conflict manually
:param ignore_newline: whether to ignore newline
:param title: title of light novel
:param authors: authors' names, separated by comma (,)
:param identifier: identifier of light novel
:param cover_link: cover_link of light novel. cover_link can either be web link or file path. if it is not beginned with "http", it would be recognized as file path. if nothing was given, then it will use the first picture of webpage.
:param cvt: OpenCC conversion configuration, used to convert between different Chinese characters. you can choose the value from "s2t", "t2s", "s2tw", "tw2s", "s2hk", "hk2s", "s2twp", "tw2sp", "t2tw", "hk2t", "t2hk", "t2jp", "jp2t", "tw2t". if nothing is provided, no conversion would be performed. for more information, please visit: https://github.com/BYVoid/OpenCC
:param path: directory for saving the light novel
:return: None
"""
echo.push_subroutine(sys._getframe().f_code.co_name)
global TOP_AREA_HEIGHT
global BOTTOM_AREA_HEIGHT
global IMAGE_EQUAL_THRESHOLD
global SAFE_AREA_PADDING
global DUMP_PATH
TOP_AREA_HEIGHT = top_area_height
BOTTOM_AREA_HEIGHT = bottom_area_height
IMAGE_EQUAL_THRESHOLD = image_equal_threshold
SAFE_AREA_PADDING = safe_area_padding
DUMP_PATH = dump_path
try:
# init directory
if not os.path.exists(dump_path):
os.mkdir(dump_path)
if html_dump is not None:
with open(html_dump, 'r', encoding='utf-8') as f:
html_content = f.read()
else:
# load and dump data
if vert_dump is not None:
vert_contents = load_contents(vert_dump)
echo.clog('Loaded vertical contents from', vert_dump)
print(vert_contents)
else:
rotate2portrait()
echo.clog('Please make sure that the orientation is portrait **before** entering the app. If not, please quit and re-enter the page. Press <Enter> when ready ...')
input()
vert_contents = get_content()
echo.clog('Got vertical contents')
dump_contents(vert_contents)
if horz_dump is not None:
horz_contents = load_contents(horz_dump)
echo.clog('Loaded horizontal contents from', horz_dump)
print(horz_contents)
else:
rotate2landscape()
print('==========================')
print('==========================')
echo.clog('Please quit and re-enter the page. Press <Enter> when ready ...')
input()
horz_contents = get_content()
echo.clog('Got horizontal contents')
dump_contents(horz_contents)
# recalculate typesettings
echo.clog('Recalculating typesettings ...')
if ignore_newline:
for i in range(len(vert_contents)):
if type(vert_contents[i]) == str:
vert_contents[i] = vert_contents[i].replace('\n', '')
vert_contents[i] = vert_contents[i].replace('\r', '')
for i in range(len(horz_contents)):
if type(horz_contents[i]) == str:
horz_contents[i] = horz_contents[i].replace('\n', '')
horz_contents[i] = horz_contents[i].replace('\r', '')
vert_ptr = horz_ptr = 0
new_contents = []
vert_str = horz_str = ''
while horz_ptr < len(horz_contents):
this_horz = horz_contents[horz_ptr]
if type(this_horz) == Image:
if not horz_str.startswith(vert_str):
if conflict_mode:
while not type(vert_contents[vert_ptr]) == Image:
this_vert = vert_contents[vert_ptr]
vert_str += this_vert
vert_ptr += 1
print('==========================')
echo.cerr('CONFLICT HAPPENED, choose which version to accept:')
echo.clog('Portrait (Vertical) Version:', vert_str)
echo.clog('Landscape (Horizontal) Version:', horz_str)
while True:
c = input('Enter "p" or "v" to accept portrait version, "l" or "h" to accept landscape version, or "q" to quit:')
if c == 'p' or c == 'v' or c == 'P' or c == 'V':
new_contents.append(vert_str)
horz_str = vert_str = ''
break
elif c == 'l' or c == 'h' or c == 'L' or c == 'H':
new_contents.append(horz_str)
horz_str = vert_str = ''
break
elif c == 'q' or c == 'Q':
echo.cexit('USER QUIT')
else:
echo.cerr('Invalid input, please try again')
else:
raise Exception('Unexpected content')
# string cleaned up
if not (horz_str == '' and vert_str == ''):
new_contents.append(horz_str)
horz_str = vert_str = ''
while not type(vert_contents[vert_ptr]) == Image:
vert_ptr += 1
# compare which image is larger
this_vert = vert_contents[vert_ptr]
if this_vert.shape[0] * this_vert.shape[1] > \
this_horz.shape[0] * this_horz.shape[1]:
new_contents.append(this_vert)
else:
new_contents.append(this_horz)
horz_ptr += 1
vert_ptr += 1
continue
elif type(this_horz) == str:
horz_str += this_horz
horz_ptr += 1
while vert_ptr < len(vert_contents) and len(vert_str) < len(horz_str):
this_vert = vert_contents[vert_ptr]
if type(this_vert) != str:
if conflict_mode:
while not type(horz_contents[horz_ptr]) == Image:
this_horz = horz_contents[horz_ptr]
horz_str += this_horz
horz_ptr += 1
print('==========================')
echo.cerr('CONFLICT HAPPENED, choose which version to accept:')
echo.clog('Portrait (Vertical) Version:', vert_str)
echo.clog('Landscape (Horizontal) Version:', horz_str)
while True:
c = input('Enter "p" or "v" to accept portrait version, "l" or "h" to accept landscape version, or "q" to quit:')
if c == 'p' or c == 'v' or c == 'P' or c == 'V':
new_contents.append(vert_str)
horz_str = vert_str = ''
break
elif c == 'l' or c == 'h' or c == 'L' or c == 'H':
new_contents.append(horz_str)
horz_str = vert_str = ''
break
elif c == 'q' or c == 'Q':
echo.cexit('USER QUIT')
else:
echo.cerr('Invalid input, please try again')
break
else:
raise Exception('Unexpected content')
vert_str += this_vert
vert_ptr += 1
if conflict_mode and horz_str == '' and vert_str == '':
continue
if horz_str == vert_str:
new_contents.append(horz_str)
horz_str = vert_str = ''
continue
else:
raise Exception('Unknown type:', type(this_horz))
dump_contents(new_contents)
doc = dominate.document(title='HTML of LK generated by JeffersonQin/lightnovel_epub')
with doc:
for content in new_contents:
if isinstance(content, str):
dominate.tags.p(content)
elif isinstance(content, Image):
img = dominate.tags.img(src=os.path.abspath(content.filePath))
img.attributes['style'] = 'width: 100%;'
else:
raise Exception('Unknown type:', type(content))
html_content = doc.render()
html_path = os.path.join(DUMP_PATH, f'./{time.time_ns()}.html')
with open(html_path, 'w+', encoding='utf-8') as f:
f.write(html_content)
echo.clog('Dumped HTML to', html_path)
# generate epub
if title is None:
title = input('Input title of light novel: ')
if authors is None:
authors = input('(optional) Input authors\' names, separated by comma (,): ')
if identifier is None:
identifier = input('(optional) Input identifier of light novel: ')
if cover_link is None:
cover_link = input('(optional) Input cover_link of light novel (see --help for further explanation): ')
novel = LightNovel(source='LK 客户端', authors=authors.split(','), identifier=identifier, title=title, cover_link=cover_link)
novel.contents = html_content
if cvt in ["s2t", "t2s", "s2tw", "tw2s", "s2hk", "hk2s", "s2twp", "tw2sp", "t2tw", "hk2t", "t2hk", "t2jp", "jp2t", "tw2t"]:
converter = opencc.OpenCC(f'{cvt}.json')
novel.contents = converter.convert(novel.contents)
novel.write_epub(path)
except Exception as e:
echo.cerr(f'Error: {repr(e)}')
traceback.print_exc()
echo.cexit('DUMP CONTENT FAILED')
finally:
echo.pop_subroutine()
if __name__ == '__main__':
cli()