-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathmain.py
173 lines (132 loc) · 6.42 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
import os
import re
import random
import string
import urllib.request
import time
import logging
# Creates a folder in "location" to store the pictures and the modified files (local link to imgs)
class FolderCreator:
def __init__(self, location = ".."):
self.location = location
def create_folder(self, name):
self.name = name
self.folder = self.location + "/" + self.name
if not os.path.exists(self.folder):
os.mkdir(self.folder)
# Write the content ("filedata") of each new modified file with "filename" as name, on the "folder_path"
class FileWritter:
def write_file(self, folder_path, filename, filedata):
self.folder_path = folder_path
self.filename = filename
self.filedata = filedata
with open(self.folder_path + "\\" + self.filename, "w", encoding="utf-8") as file:
file.write(self.filedata)
# Download the images from the links obtained from the markdown files to the "destination folder"
# The user-agent can be specified in order to circunvent some simple potential connection block from the
# sources of the images
class ImgDownloader:
def download_images(self, url_dict, folder_path, user_agent):
self.url_dict = url_dict
self.folder_path = folder_path
self.user_agent = user_agent
for url, name in self.url_dict.items():
opener = urllib.request.build_opener()
opener.addheaders = [('User-agent', self.user_agent)]
urllib.request.install_opener(opener)
save_name = self.folder_path + "\\" + name
try:
urllib.request.urlretrieve(url, save_name)
except Exception as e:
logging.exception(f"Error when downloading {url}")
time.sleep(random.randint(0,2))
# Open and reads the file received and returns the content
class FileOpener:
def open_and_read(self, filename):
self.url_dict = {}
self.filename = filename
try:
with open(os.path.join(os.getcwd(), filename), "r", encoding="utf-8") as self.current_opened_file:
print(f"\nOpened file: {self.filename}")
logging.info(f"Opened file: {self.filename}\n")
return self.current_opened_file.read()
except Exception as e:
logging.exception(f"Error when opening file {self.filename}")
# Find(regex) URL's for images on the received "file_data" and creates a dictionary with the url's for later download as keys
# and a random 10 digit number followed by the images names (something.jpg)
# in order to save the files later and prevent name collisions
class UrlDictCreator:
def create(self, regex, file_data, file_name):
self.file_name = file_name
self.url_dict = {}
self.regex = regex
self.file_data = file_data
try:
for url in re.findall(self.regex, self.file_data):
self.random_name = "".join([random.choice(string.hexdigits) for i in range(10)])
if url[0] not in self.url_dict.keys():
self.url_dict[url[0]] = self.random_name + url[1]
except:
logging.exception("Error when trying to search url's and add them to dicionary")
return self.url_dict
# Edit the markdown files, changing the url's links for a new name corresponding to the name of the local file
# images that will be downloaded later
class FileDataEditor:
def edit(self, file_data, url_dict, file_name):
self.file_name = file_name
self.url_dict = url_dict
self.file_data = file_data
for key, value in url_dict.items():
self.file_data = self.file_data.replace(key, value)
print(f"\nreplaced: {key}\nwith {value}\n on file {self.file_name}\n")
logging.info(f"replaced: {key}\nwith: {value}\non file: {self.file_name}\n")
return self.file_data
# Program start:
print("\n\n\nStarting..\n")
# Create new log file
logging.basicConfig(filename='Img_To_Local_Python.log', encoding='utf-8', filemode="w", level=logging.DEBUG)
# Defines the folder to write the new markdown files and the downloaded images
folder_name = "External_Imgs_to_Local_Files"
folder_path = os.path.abspath(os.path.join(os.getcwd(),os.pardir) + f"\/{folder_name}\/")
# Create new folder to receive the downloaded imgs and edited MD files
folder_creator = FolderCreator()
folder_creator.create_folder(folder_name)
logging.info(f"New folder created: {folder_path}\n")
print(f"New folder created: {folder_path}")
logging.info("to receive the imgs and edited markdown files\n")
print("to receive the imgs and edited markdown files\n")
# Regex that will be used to look for url's of images
regex = r"(?:\(|\[)(?P<url>(?:https?\:(?:\/\/)?)(?:\w|\-|\_|\.|\?|\/)+?\/(?P<end>(?:\w|\-|\_)+\.(?:png|jpg|jpeg|gif|bmp|svg)))(?:\)|\])"
# Loop throught every markdown file on this script folder
for filename in os.listdir(os.getcwd()):
print("\n")
if filename[-3:] != ".md":
# log_file_creator.write(f"{filename} ignored (not '.md')\n")
logging.info(f"Skipped file: {filename}\n")
print(f"Skipped file: {filename}")
continue
# Open and read each file
file_opener = FileOpener()
file_data = file_opener.open_and_read(filename)
# Create a dictionary of images URLs for each file
url_dict_creator = UrlDictCreator()
url_dict = url_dict_creator.create(regex, file_data, filename)
# Edit the read content of each file, replacing the found imgs urls with local file names instead
file_data_editor = FileDataEditor()
edited_file_data = file_data_editor.edit(file_data, url_dict, filename)
# Download the images listed on the dictionary of found urls for each file
images_downloader = ImgDownloader()
user_agent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1"
images_downloader.download_images(url_dict, folder_path, user_agent)
# Write the modified markdown files
if url_dict:
file_name_writter = FileWritter()
file_name_writter.write_file(folder_path,filename, edited_file_data)
print(f"Closed file: {filename}")
logging.info(f"Closed file: {filename}\n")
print("\n\n\nIf everything went OK, you can check your modified markdown")
print("files and the downloaded images on the folder:")
print(f"{folder_path}")
print(f"\nFor more info check the log file on \n{os.getcwd()}\\PythonObsidian.log")
print("\nPress enter to close")
input()