-
Notifications
You must be signed in to change notification settings - Fork 1
/
mlit_ksj_dl.py
117 lines (104 loc) · 3.43 KB
/
mlit_ksj_dl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import glob
import os
import pathlib
import pprint
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.select import Select
from selenium.webdriver.common.alert import Alert
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import sys
import time
import zipfile
DRIVER_PATH = '../mlit-ksj-dl-tool/WebDriver/chromedriver'
EXTENT = "shp"
ENCODING = "Shift_JIS"
def file_dl(driver_path:str) -> str:
"""download files from mlit ksj, 国土数値情報 by browser automation
Parameters
-----
driver_path: str
path to WebDriver
Returns
-----
dl_dir: str
directory path to the files downloaded
"""
dldir_name = 'download'
dldir_path = pathlib.Path(os.getcwd(), dldir_name)
dldir_path.mkdir(exist_ok=True)
dl_dir = str(dldir_path.resolve())
# Chrome option to boot Selenium in any environment
options = Options()
options.add_argument('--disable-gpu')
options.add_argument('--disable-extensions')
options.add_argument('--proxy-server="direct://"')
options.add_argument('--proxy-bypass-list=*')
options.add_argument('--start-maximized')
options.add_experimental_option("prefs", {
"download.default_directory": dl_dir})
# options.add_argument('--headless'); # ※ヘッドレスモードを使用する場合、コメントアウトを外す
# Opne browser
driver = webdriver.Chrome(executable_path=DRIVER_PATH, chrome_options=options)
# Open DL page of mlit ksj 国土数値情報
url = sys.argv[1]
driver.get(url)
selector = '#menu-button'
elements = driver.find_elements_by_css_selector(selector)
print(str(len(elements)) + " zip file is going to be DL")
for i, e in enumerate(elements):
print(i)
e.click()
time.sleep(1)
Alert(driver).accept()
time.sleep(2)
return dl_dir
def extraction(dl_dir: str) -> str:
"""extract zipfiles and accumulate the shpfile in a directory
Parameters
-----
dl_dir: str
absolute path the zipfiles are
Returns
-----
ext_dir: str
absolute path the shapefiles are
"""
extdir_name = 'shp' # save folder
extdir_path = pathlib.Path(os.getcwd(), extdir_name)
extdir_path.mkdir(exist_ok=True)
ext_dir = str(extdir_path.resolve())
print(ext_dir)
zip_files = glob.glob(dl_dir + "/*.zip")
print(zip_files)
for z in zip_files:
with zipfile.ZipFile(z) as existing_zip:
existing_zip.extractall(ext_dir)
return ext_dir
def create_cpg(extent:str, encoding:str, ext_dir:str):
"""create the cpg files for each shp
Parameters
-----
extent: str
extention of shp
encoding: str
encoding of the shpfiles
ext_dir: str
absolute path the shpfiles are
"""
files = glob.glob(ext_dir + "/*." + EXTENT)
pprint.pprint(files)
for f in files:
basename = os.path.splitext(os.path.basename(f))[0]
path = os.path.join(ext_dir, basename + ".cpg")
cpg = open(path, 'w')
cpg.write(ENCODING)
cpg.close()
if __name__ == "__main__":
dl_dir = file_dl(DRIVER_PATH)
ext_dir = extraction(dl_dir)
create_cpg(EXTENT, ENCODING, ext_dir)