-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathremove-watermark.py
90 lines (73 loc) · 2.81 KB
/
remove-watermark.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
from skimage import io
from pdf2image import convert_from_path
import numpy as np
import os
import img2pdf
'''
判断的参考代码:https://github.com/LJSthu/Python-Remove-Watermark/blob/master/watermark.py
img2pdf,环境:需先执行:`brew install poppler`
遍历当前目录下所有的jpg文件,并按照文件夹名称合并成pdf文档
'''
# change these paths to your own file path
# source pdf path
input_dir = '/Users/frank/Code/Python/from'
# target pdf path
output_dir = '/Users/frank/Code/Python/to'
# temp image dir path
img_dir = '/Users/frank/Code/Python/temp'
# 选择像素点(select pixel point)
def select_pixel(r, g, b):
if 175 < r < 250 and 175 < g < 250 and 175 < b < 250:
return True
else:
return False
# 处理图片矩阵(handle image matrix)
def handle(matrix):
height = matrix.shape[0]
width = matrix.shape[1]
for i in range(height):
for j in range(width):
if select_pixel(matrix[i][j][0], matrix[i][j][1], matrix[i][j][2]):
matrix[i][j] = (255, 255, 255)
# matrix[i][j][0] = matrix[i][j][1] = matrix[i][j][2] = 255
return matrix
# 将 pdf 文件转为 img 文件(convert pdf file to jpg file)
def convert_pdf_2_img(input_file):
images = convert_from_path(input_file)
index = 1
img_length = len(images)
print(os.path.basename(input_file), 'is converted to : ', img_length, ' images')
for img in images:
img = np.array(img)
img = handle(img)
io.imsave(img_dir + '/' + str(index) + '.jpg', img)
print(index, '/', img_length)
index += 1
# 将 img 文件合并为 pdf(convert img files to pdf file)
def image2pdf(img_path, output_path):
list_of_img_name = sorted(os.listdir(img_path), key=lambda x: int(x[:-4]))
list_of_img_path = []
for file_path in list_of_img_name:
if file_path.endswith(".jpg"):
list_of_img_path.append(os.path.join(img_path, file_path))
else:
pass
with open(output_path + ".pdf", "wb+") as f:
f.write(img2pdf.convert(list_of_img_path))
print(output_path + '.pdf is done.')
# 删除目录 _dir 下的所有文件(delete files under the _dir)
def remove_file_in_dir(_dir):
print('start to delete temp files under the ', _dir, ' ...', end=' ')
for _file in os.listdir(_dir):
if _file[-4:] == '.jpg':
os.remove(os.path.join(_dir, _file))
print('all temp jpg files are deleted!')
# main function
if __name__ == '__main__':
for file in os.listdir(input_dir):
if file[-4:] == '.pdf':
name = os.path.splitext(file)[0]
convert_pdf_2_img(os.path.join(input_dir, file))
print('start to merge jpg files to pdf ...')
image2pdf(img_dir, os.path.join(output_dir, name))
remove_file_in_dir(img_dir)