Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create a Zenodo download manager #697

Merged
merged 4 commits into from
Nov 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# This is specific to this package
powersimdata/utility/.server_user
config.ini
powersimdata/network/europe_tub/data/*
powersimdata/network/europe_tub/data*

# The remainder of this file taken from github/gitignore
# https://github.com/github/gitignore/blob/master/Python.gitignore
Expand Down
1 change: 0 additions & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ pytest = "*"
coverage = "*"
pytest-cov = "*"
pypsa = "*"
zenodo_get = "*"

[packages]
networkx = "~=2.5"
Expand Down
60 changes: 33 additions & 27 deletions powersimdata/network/europe_tub/model.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import os
import shutil
from zipfile import ZipFile

from powersimdata.network.constants.region.geography import get_geography
from powersimdata.network.constants.region.zones import from_pypsa
Expand All @@ -9,59 +7,67 @@
interconnect_to_name,
)
from powersimdata.network.model import ModelImmutables
from powersimdata.network.zenodo import Zenodo
from powersimdata.utility.helpers import _check_import

pypsa = _check_import("pypsa")
zenodo_get = _check_import("zenodo_get")


class TUB:
"""PyPSA Europe network.

:param str/iterable interconnect: interconnect name(s).
:param str zenodo_record_id: the zenodo record id. If set to None, v0.6.1 will
be used. If set to latest, the latest version will be used.
:param int reduction: reduction parameter (number of nodes in network). If None,
the full network is loaded.
:param bool overwrite: the existing dataset is deleted and a new dataset is
downloaded from zenodo.
"""

def __init__(self, interconnect, reduction=None, overwrite=False):
def __init__(self, interconnect, zenodo_record_id=None, reduction=None):
"""Constructor."""
self.grid_model = "europe_tub"
self.interconnect = check_and_format_interconnect(
interconnect, model=self.grid_model
)
self.data_loc = os.path.join(os.path.dirname(__file__), "data")
self.zenodo_record_id = "3601881"
self.reduction = reduction

if overwrite:
self.remove_data()
if zenodo_record_id is None:
z = Zenodo("7251657")
elif zenodo_record_id == "latest":
z = Zenodo("3601881")
BainanXia marked this conversation as resolved.
Show resolved Hide resolved
else:
z = Zenodo(zenodo_record_id)

self.retrieve_data()
z.load_data(os.path.dirname(__file__))
self.data_loc = os.path.join(z.dir, "networks")
self._set_reduction(reduction)

def remove_data(self):
"""Remove data stored on disk"""
print("Removing PyPSA-Eur dataset")
shutil.rmtree(self.data_loc)
def _set_reduction(self, reduction):
"""Validate and set reduction parameter

def retrieve_data(self):
"""Fetch data"""
zenodo_get.zenodo_get([self.zenodo_record_id, "-o", f"{self.data_loc}"])
with ZipFile(os.path.join(self.data_loc, "networks.zip"), "r") as zip_network:
zip_network.extractall(self.data_loc)
:raises ValueError: if ``reduction`` is not available.
"""
if reduction is None:
self.reduction = None
else:
available = [
s
for f in os.listdir(self.data_loc)
for s in f.split("_")
if s.isdigit()
]
if str(reduction) in available:
self.reduction = reduction
else:
raise ValueError(f"Available reduced network: {' | '.join(available)}")

def build(self):
"""Build network"""
path = os.path.join(self.data_loc, "networks", "elec_s")
path = os.path.join(self.data_loc, "elec_s")
if self.reduction is None:
network = pypsa.Network(path + ".nc")
elif os.path.exists(path + f"_{self.reduction}_ec.nc"):
network = pypsa.Network(path + f"_{self.reduction}_ec.nc")
else:
raise ValueError(
"Invalid Resolution. Choose among: None | 1024 | 512 | 256 | 128 | 37"
)
network = pypsa.Network(path + f"_{self.reduction}_ec.nc")

id2zone = {i: l for i, l in enumerate(network.buses.index)}
zone2id = {l: i for i, l in id2zone.items()}

Expand Down
144 changes: 144 additions & 0 deletions powersimdata/network/zenodo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
import hashlib
import json
import os
import shutil
from contextlib import contextmanager
from zipfile import ZipFile

import requests
from tqdm import tqdm

url = "https://zenodo.org/api/records/"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here. Not sure whether we should put it in a const module.



class Zenodo:
"""Get data from a Zenodo archive.

:param str record_id: zenodo record id
"""

def __init__(self, record_id):
"""Constructor"""
self.record_id = record_id
self.content = self._get_record_content()

def _get_record_content(self):
"""Make HTTP request to zenodo API and retrieve content.

:return: (*dict*) -- content of the response in unicode.
:raises Exception: if connection times out.
:raises ValueError: if record is invalid.
"""
try:
r = requests.get(url + self.record_id, timeout=10)
except requests.exceptions.ConnectTimeout:
raise ConnectionError("Connection to zenodo.org timed out")

if not r.ok:
raise ValueError(f"Record could not be accessed. Status: {r.status_code}")

content = json.loads(r.text)
metadata = content["metadata"]
print(f"Title: {metadata['title']}")
print(f"Publication date: {metadata['publication_date']}")
print(f"Version: {metadata['version']}")
print(f"DOI: {metadata['doi']}")

return content

def _get_remote_checksum(self, f):
"""Get checksum of local copy of a file

:param dict f: dictionary containing information on the remote copy of a file.
:return: (*str*) -- checksum
"""
return f["checksum"].split(":")[1]

def _get_local_checksum(self, f):
"""Get remote copy of a file.

:param dict f: dictionary containing information on the local copy of a file.
:return: (*str*) -- checksum if file exists
"""
filename = os.path.join(self.dir, f["key"])
if not os.path.exists(filename):
return "invalid"
else:
h = hashlib.new(f["checksum"].split(":")[0])
with open(filename, "rb") as file:
bytes = file.read()
h.update(bytes)
return h.hexdigest()

@contextmanager
def _change_dir(self):
work_dir = os.getcwd()
os.chdir(os.path.expanduser(self.dir))
BainanXia marked this conversation as resolved.
Show resolved Hide resolved
try:
yield
finally:
os.chdir(work_dir)

def _download_data(self, f):
"""Fetch data.

:param dict f: information on the file to download.
"""
with requests.get(f["links"]["self"], stream=True) as r:
r.raise_for_status()
with open(f["key"], "wb") as file:
with tqdm(
unit="B",
unit_scale=True,
unit_divisor=1024,
miniters=1,
total=f["size"],
) as pbar:
for chunk in r.iter_content(chunk_size=8192):
file.write(chunk)
pbar.update(len(chunk))

def _delete_data(self, f):
"""Delete data.

:param dict f: information on the file to delete.
"""
os.remove(f["key"])
if f["type"] == "zip":
shutil.rmtree(f["key"][:-4])

def _unzip_data(self, f):
"""Unzip data.

:param dict f: information on the file to unzip.
"""
if f["type"] == "zip":
with ZipFile(f["key"], "r") as file:
file.extractall()

def load_data(self, model_dir):
"""Download file(s)

:param str model_dir: path to directory of the grid model.
:raises FileNotFoundError: if ``model_dir`` does not exist.
"""
if not os.path.isdir(model_dir):
raise FileNotFoundError(f"{model_dir} does not exist")
else:
version = self.content["metadata"]["version"]
self.dir = os.path.join(model_dir, f"data_{version}")
try:
os.mkdir(self.dir)
for f in self.content["files"]:
with self._change_dir():
self._download_data(f)
self._unzip_data(f)
except FileExistsError:
for f in self.content["files"]:
if self._get_local_checksum(f) != self._get_remote_checksum(f):
with self._change_dir():
self._delete_data(f)
self._download_data(f)
self._unzip_data(f)
else:
print(f"{f['key']} has been downloaded previously")
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,3 @@ pytest
coverage
pytest-cov
pypsa
zenodo_get