Skip to content

Commit

Permalink
refine plate_model.py
Browse files Browse the repository at this point in the history
  • Loading branch information
michaelchin committed Jul 26, 2023
1 parent eea1a30 commit 0032cef
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 78 deletions.
2 changes: 1 addition & 1 deletion gplately/network_aiohttp.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ async def _fetch_file(
if r.status == 304:
print(url)
print(
"The file has not been changed since it was downloaded last time.Do nothing and return."
"The file has not been changed since it was downloaded last time. Do nothing and return."
)
elif r.status == 200:
if auto_unzip and url.endswith(".zip"):
Expand Down
2 changes: 1 addition & 1 deletion gplately/network_requests.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def fetch_file(
if r.status_code == 304:
print(url)
print(
"The file has not been changed since it was downloaded last time.Do nothing and return."
"The file has not been changed since it was downloaded last time. Do nothing and return."
)
elif r.status_code == 200:
if auto_unzip and url.endswith(".zip"):
Expand Down
155 changes: 85 additions & 70 deletions gplately/plate_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,17 @@ def __init__(self, model_name, data_dir=None, force_fresh=False):
models_file = f"{self.data_dir}/models.json"
models = None

# async and concurrent things
self.executor = concurrent.futures.ThreadPoolExecutor(max_workers=15)
self.loop = asyncio.new_event_loop()
self.run = functools.partial(self.loop.run_in_executor, self.executor)
asyncio.set_event_loop(self.loop)

if force_fresh:
# force refresh models.json
if os.path.isfile(models_file) and force_fresh:
os.remove(models_file)

# check the local models cfg first
# check the local models cfg first. if not too old, use it
if os.path.isfile(models_file):
if (time.time() - os.path.getmtime(models_file)) < 6 * 60 * 60: # 6 hours
with open(models_file) as f:
Expand Down Expand Up @@ -72,6 +74,9 @@ def __init__(self, model_name, data_dir=None, force_fresh=False):
def __del__(self):
self.loop.close()

def get_data_dir(self):
return self.data_dir

def get_avail_layers(self):
"""get all available layers in this plate model"""
if not self.model:
Expand Down Expand Up @@ -107,7 +112,11 @@ def get_COBs(self):
return self.get_layer("COBs")

def get_layer(self, layer_name):
"""get a layer by name"""
"""get a layer by name
:param layer_name: layer name
:returns: pygplates.FeatureCollection
"""
file_extensions = [
"gpml",
"gpmlz",
Expand All @@ -133,7 +142,8 @@ def get_layer(self, layer_name):
return fc

def download_layer_files(self, layer_name, dst_path=None, force=False):
"""given the layer name, download the layer files
"""given the layer name, download the layer files.
The layer files are in a .zip file. download and unzip it.
:param layer_name: such as "Rotations","Coastlines", "StaticPolygons", "ContinentalPolygons", "Topologies", etc
:param force: delete the local files and download again
Expand All @@ -143,8 +153,9 @@ def download_layer_files(self, layer_name, dst_path=None, force=False):
"""
print(f"downloading {layer_name}")
download_flag = False
download_with_etag = False
meta_etag = None

# find layer file url. two parts. one is the rotation, the other is all other geometry layers
if layer_name in self.model:
layer_file_url = self.model[layer_name]
elif "Layers" in self.model and layer_name in self.model["Layers"]:
Expand All @@ -171,66 +182,41 @@ def download_layer_files(self, layer_name, dst_path=None, force=False):
if os.path.isdir(layer_folder) and force:
shutil.rmtree(layer_folder)

# first check if the "Rotations" folder exists
# first check if the layer folder exists
if os.path.isdir(layer_folder):
metadata_file = f"{layer_folder}/{self.meta_filename}"
if os.path.isfile(metadata_file):
with open(metadata_file, "r") as f:
meta = json.load(f)
if "url" in meta:
meta_url = meta["url"]
if meta_url != layer_file_url:
# if the data url has changed, re-download
download_flag = True
if "expiry" in meta:
meta_expiry = meta["expiry"]
expiry_date = datetime.strptime(meta_expiry, self.expiry_format)
now = datetime.now()
if now > expiry_date:
download_with_etag = True
# now.strftime("%m/%d/%Y, %H:%M:%S")
if "etag" in meta:
meta_etag = meta["etag"]
else:
# if the metadata.json does not exist
download_flag = True

download_flag, meta_etag = self._check_redownload_need(
metadata_file, layer_file_url
)
else:
# if the "Rotations" folder does not exist
# if the layer folder does not exist
download_flag = True

if not download_flag and not download_with_etag:
if not download_flag:
print("The local files are still good. Will not download again.")
else:
new_etag = None
if download_flag:
new_etag = network_requests.fetch_file(
layer_file_url,
model_folder,
auto_unzip=True,
)
elif download_with_etag and meta_etag:
new_etag = network_requests.fetch_file(
layer_file_url,
model_folder,
etag=meta_etag,
auto_unzip=True,
)
new_etag = network_requests.fetch_file(
layer_file_url,
model_folder,
etag=meta_etag,
auto_unzip=True,
)

# save metadata
metadata = {
"layer_name": layer_name,
"url": layer_file_url,
"expiry": (now + timedelta(hours=12)).strftime(self.expiry_format),
"etag": new_etag,
}
with open(f"{layer_folder}/{self.meta_filename}", "w+") as f:
json.dump(metadata, f)
if new_etag != meta_etag:
# save metadata
metadata = {
"layer_name": layer_name,
"url": layer_file_url,
"expiry": (now + timedelta(hours=12)).strftime(self.expiry_format),
"etag": new_etag,
}
with open(f"{layer_folder}/{self.meta_filename}", "w+") as f:
json.dump(metadata, f)

return layer_folder

def download(self, dst_path=None, force=False):
"""download all layers"""
def download_all_layers(self, dst_path=None, force=False):
"""download all layers. Call download_layer_files() on every layer"""

async def f():
tasks = []
Expand All @@ -250,7 +236,13 @@ async def f():
self.loop.run_until_complete(f())

def _check_redownload_need(self, metadata_file, url):
"""check the metadata file and decide if redownload is necessary"""
"""check the metadata file and decide if redownload is necessary
:param metadata_file: metadata file path
:param url: url for the target file
:returns download_flag, etag: a flag indicates if redownload is neccesarry and old etag if needed.
"""
download_flag = False
meta_etag = None
if os.path.isfile(metadata_file):
Expand Down Expand Up @@ -287,16 +279,34 @@ def _check_redownload_need(self, metadata_file, url):

return download_flag, meta_etag

def download_time_dependent_rasters(self, raster_name, dst_path):
""""""
def get_avail_time_dependent_raster_names(self):
"""return the names of all time dependent rasters which have been configurated in this model."""
if not "TimeDepRasters" in self.model:
return []
else:
return [name for name in self.model["TimeDepRasters"]]

def download_time_dependent_rasters(self, raster_name, dst_path=None, times=None):
"""download time dependent rasters, such agegrids
:param raster_name: raster name, such as AgeGrids. see the models.json
:param dst_path: where to save the files
:param times: if not given, download from begin to end with 1My interval
"""
if (
"TimeDepRasters" in self.model
and raster_name in self.model["TimeDepRasters"]
):

async def f():
nonlocal times
nonlocal dst_path
tasks = []
for time in range(self.model["SmallTime"], self.model["BigTime"]):
if not dst_path:
dst_path = f"{self.get_data_dir()}/{self.model_name}/{raster_name}"
if not times:
times = range(self.model["SmallTime"], self.model["BigTime"])
for time in times:
tasks.append(
self.run(
self.download_raster,
Expand All @@ -316,27 +326,32 @@ async def f():
)

def download_raster(self, url, dst_path):
""""""
"""download a single raster file from "url" and save the file in "dst_path"
a metadata file will also be created for the raster file in folder f"{dst_path}/metadata"
"""
print(f"downloading {url}")
filename = url.split("/")[-1]
metadata_folder = f"{dst_path}/metadata"
metadata_file = f"{metadata_folder}/{filename}.json"
download_flag, etag = self._check_redownload_need(metadata_file, url)
# only redownload when necessary
if download_flag:
new_etag = network_requests.fetch_file(
url,
dst_path,
etag=etag,
auto_unzip=True,
)
# save metadata
metadata = {
"url": url,
"expiry": (datetime.now() + timedelta(hours=12)).strftime(
self.expiry_format
),
"etag": new_etag,
}
Path(metadata_folder).mkdir(parents=True, exist_ok=True)
with open(metadata_file, "w+") as f:
json.dump(metadata, f)
if etag != new_etag:
# save metadata file
metadata = {
"url": url,
"expiry": (datetime.now() + timedelta(hours=12)).strftime(
self.expiry_format
),
"etag": new_etag,
}
Path(metadata_folder).mkdir(parents=True, exist_ok=True)
with open(metadata_file, "w+") as f:
json.dump(metadata, f)
14 changes: 8 additions & 6 deletions unittest/test_plate_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,16 @@
sys.path.insert(0, "../")
from gplately import plate_model

model = plate_model.PlateModel("Muller2019")
model = plate_model.PlateModel("Muller2019", data_dir="test-plate-model-folder")

# print(model.get_avail_layers())
print(model.get_avail_layers())

# print(model.get_rotation_model())
print(model.get_rotation_model())

# print(model.get_layer("Coastlines"))
print(model.get_layer("Coastlines"))

model.download(dst_path="test-download-folder")
model.download_all_layers()

# model.download_time_dependent_rasters("AgeGrids", "test-age-agrids-download")
model.download_time_dependent_rasters("AgeGrids", times=[1, 2])

print(model.get_data_dir())

0 comments on commit 0032cef

Please sign in to comment.