@@ -88,6 +88,20 @@ def get_corpus(filename: str) -> frozenset:
8888 return frozenset (lines )
8989
9090
91+ def _update_all ():
92+ print ("Update Corpus..." )
93+ with TinyDB (corpus_db_path ()) as local_db :
94+ item_all = local_db .all ()
95+ query = Query ()
96+ for item in item_all :
97+ name = item ["name" ]
98+ if "file_name" in item .keys ():
99+ local_db .update ({"filename" : item ["file_name" ]}, query .name == name )
100+ elif "file" in item .keys ():
101+ local_db .update ({"filename" : item ["file" ]}, query .name == name )
102+ local_db .close ()
103+
104+
91105def get_corpus_path (name : str ) -> Union [str , None ]:
92106 """
93107 Get corpus path.
@@ -125,13 +139,18 @@ def get_corpus_path(name: str) -> Union[str, None]:
125139 """
126140 # check if the corpus is in local catalog, download if not
127141 corpus_db_detail = get_corpus_db_detail (name )
128- if not corpus_db_detail or not corpus_db_detail .get ("file_name" ):
142+ if corpus_db_detail .get ("file_name" ) is not None and corpus_db_detail .get ("filename" ) is None :
143+ _update_all ()
144+ elif corpus_db_detail .get ("file" ) is not None and corpus_db_detail .get ("filename" ) is None :
145+ _update_all ()
146+
147+ if not corpus_db_detail or not corpus_db_detail .get ("filename" ):
129148 download (name )
130149 corpus_db_detail = get_corpus_db_detail (name )
131150
132- if corpus_db_detail and corpus_db_detail .get ("file_name " ):
151+ if corpus_db_detail and corpus_db_detail .get ("filename " ):
133152 # corpus is in the local catalog, get full path to the file
134- path = get_full_data_path (corpus_db_detail .get ("file_name " ))
153+ path = get_full_data_path (corpus_db_detail .get ("filename " ))
135154 # check if the corpus file actually exists, download if not
136155 if not os .path .exists (path ):
137156 download (name )
@@ -263,7 +282,7 @@ def download(name: str, force: bool = False, url: str = None, version: str = Non
263282 {
264283 "name" : name ,
265284 "version" : version ,
266- "file_name " : file_name ,
285+ "filename " : file_name ,
267286 }
268287 )
269288 else :
0 commit comments