11# -*- coding: utf-8 -*-
2- from __future__ import absolute_import ,unicode_literals
3- from pythainlp .tools import get_path_db ,get_path_data
4- from tinydb import TinyDB ,Query
5- from future .moves .urllib .request import urlopen
6- from tqdm import tqdm
7- import requests
2+
3+ from __future__ import absolute_import , unicode_literals
4+
85import os
6+
97import requests
10- #__all__ = ["thaipos", "thaiword","alphabet","tone","country","wordnet"]
11- path_db_ = get_path_db ()
8+ from future .moves .urllib .request import urlopen
9+ from pythainlp .tools import get_path_data , get_path_db
10+ from tinydb import Query , TinyDB
11+ from tqdm import tqdm
12+
13+ CORPUS_DB_URL = (
14+ "https://raw.githubusercontent.com/PyThaiNLP/pythainlp-corpus/master/db.json"
15+ )
16+
17+ # __all__ = ["thaipos", "thaiword","alphabet","tone","country","wordnet"]
18+ path_db_ = get_path_db ()
19+
20+
1221def get_file (name ):
13- db = TinyDB (path_db_ )
22+ db = TinyDB (path_db_ )
1423 temp = Query ()
15- if len (db .search (temp .name == name ))> 0 :
16- path = get_path_data (db .search (temp .name == name )[0 ][' file' ])
24+ if len (db .search (temp .name == name )) > 0 :
25+ path = get_path_data (db .search (temp .name == name )[0 ][" file" ])
1726 db .close ()
1827 if not os .path .exists (path ):
1928 download (name )
2029 return path
30+
31+
2132def download_ (url , dst ):
2233 """
2334 @param: url to download file
2435 @param: dst place to put the file
2536 """
26- file_size = int (urlopen (url ).info ().get (' Content-Length' , - 1 ))
37+ file_size = int (urlopen (url ).info ().get (" Content-Length" , - 1 ))
2738 if os .path .exists (dst ):
2839 first_byte = os .path .getsize (dst )
2940 else :
@@ -32,55 +43,90 @@ def download_(url, dst):
3243 return file_size
3344 header = {"Range" : "bytes=%s-%s" % (first_byte , file_size )}
3445 pbar = tqdm (
35- total = file_size , initial = first_byte ,
36- unit = 'B' , unit_scale = True , desc = url .split ('/' )[- 1 ])
46+ total = file_size ,
47+ initial = first_byte ,
48+ unit = "B" ,
49+ unit_scale = True ,
50+ desc = url .split ("/" )[- 1 ],
51+ )
3752 req = requests .get (url , headers = header , stream = True )
38- with (open (get_path_data (dst ), 'wb' )) as f :
53+ with (open (get_path_data (dst ), "wb" )) as f :
3954 for chunk in req .iter_content (chunk_size = 1024 ):
4055 if chunk :
4156 f .write (chunk )
4257 pbar .update (1024 )
4358 pbar .close ()
44- #return file_size
45- def download (name ,force = False ):
46- db = TinyDB (path_db_ )
59+ # return file_size
60+
61+
62+ def download (name , force = False ):
63+ db = TinyDB (path_db_ )
4764 temp = Query ()
48- data = requests .get ("https://raw.githubusercontent.com/PyThaiNLP/pythainlp-corpus/master/db.json" )
49- data_json = data .json ()
65+ data = requests .get (CORPUS_DB_URL )
66+ data_json = data .json ()
5067 if name in list (data_json .keys ()):
51- temp_name = data_json [name ]
52- print ("Download : " + name )
53- if len (db .search (temp .name == name ))== 0 :
54- print (name + " " + temp_name ['version' ])
55- download_ (temp_name ['download' ],temp_name ['file_name' ])
56- db .insert ({'name' : name , 'version' : temp_name ['version' ],'file' :temp_name ['file_name' ]})
68+ temp_name = data_json [name ]
69+ print ("Download : " + name )
70+
71+ if not db .search (temp .name == name ):
72+ print (name + " " + temp_name ["version" ])
73+ download_ (temp_name ["download" ], temp_name ["file_name" ])
74+ db .insert (
75+ {
76+ "name" : name ,
77+ "version" : temp_name ["version" ],
78+ "file" : temp_name ["file_name" ],
79+ }
80+ )
5781 else :
58- if len (db .search (temp .name == name and temp .version == temp_name ['version' ]))== 0 :
82+ if not db .search (
83+ temp .name == name and temp .version == temp_name ["version" ]
84+ ):
5985 print ("have update" )
60- print ("from " + name + " " + db .search (temp .name == name )[0 ]['version' ]+ " update to " + name + " " + temp_name ['version' ])
61- yes_no = "y"
62- if force == False :
63- yes_no = str (input ("y or n : " )).lower ()
64- if "y" == yes_no :
65- download_ (temp_name ['download' ],temp_name ['file_name' ])
66- db .update ({'version' :temp_name ['version' ]},temp .name == name )
86+ print (
87+ "from "
88+ + name
89+ + " "
90+ + db .search (temp .name == name )[0 ]["version" ]
91+ + " update to "
92+ + name
93+ + " "
94+ + temp_name ["version" ]
95+ )
96+ yes_no = "y"
97+ if not force :
98+ yes_no = str (input ("y or n : " )).lower ()
99+ if "y" == yes_no :
100+ download_ (temp_name ["download" ], temp_name ["file_name" ])
101+ db .update ({"version" : temp_name ["version" ]}, temp .name == name )
67102 else :
68103 print ("re-download" )
69- print ("from " + name + " " + db .search (temp .name == name )[0 ]['version' ]+ " update to " + name + " " + temp_name ['version' ])
70- yes_no = "y"
71- if force == False :
72- yes_no = str (input ("y or n : " )).lower ()
73- if "y" == yes_no :
74- download_ (temp_name ['download' ],temp_name ['file_name' ])
75- db .update ({'version' :temp_name ['version' ]},temp .name == name )
104+ print (
105+ "from "
106+ + name
107+ + " "
108+ + db .search (temp .name == name )[0 ]["version" ]
109+ + " update to "
110+ + name
111+ + " "
112+ + temp_name ["version" ]
113+ )
114+ yes_no = "y"
115+ if not force :
116+ yes_no = str (input ("y or n : " )).lower ()
117+ if "y" == yes_no :
118+ download_ (temp_name ["download" ], temp_name ["file_name" ])
119+ db .update ({"version" : temp_name ["version" ]}, temp .name == name )
76120 db .close ()
121+
122+
77123def remove (name ):
78- db = TinyDB (path_db_ )
124+ db = TinyDB (path_db_ )
79125 temp = Query ()
80- data = db .search (temp .name == name )
81- if len (data )> 0 :
82- path = get_file (name )
126+ data = db .search (temp .name == name )
127+ if len (data ) > 0 :
128+ path = get_file (name )
83129 os .remove (path )
84- db .remove (temp .name == name )
130+ db .remove (temp .name == name )
85131 return True
86- return False
132+ return False
0 commit comments