Skip to content

Commit

Permalink
Merge pull request #1229 from lisphilar/issue1223
Browse files Browse the repository at this point in the history
Issue1223
  • Loading branch information
lisphilar authored Sep 23, 2022
2 parents bc1d537 + 0d2178c commit 331bf39
Show file tree
Hide file tree
Showing 8 changed files with 60 additions and 50 deletions.
87 changes: 49 additions & 38 deletions covsirphy/downloading/downloader.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import warnings
from covsirphy.util.error import NotRegisteredError, SubsetNotFoundError
from covsirphy.util.validator import Validator
from covsirphy.util.term import Term
Expand Down Expand Up @@ -44,50 +45,53 @@ def layer(self, country=None, province=None, databases=None):
databases (list[str] or None): databases to use or None (japan, covid19dh, google, owid).
"japan": COVID-19 Dataset in Japan,
"covid19dh": COVID-19 Data Hub,
"google: COVID-19 Open Data by Google Cloud Platform,
"google": COVID-19 Open Data by Google Cloud Platform (deprecated),
"owid": Our World In Data,
"wpp": World Population Prospects by United nations.
Note:
"google" for @database was deprecated and refer to https://github.com/lisphilar/covid19-sir/issues/1223
Returns:
pandas.DataFrame:
Index
reset index
Columns
- Date (pandas.Timestamp): observation date
- ISO3 (str): country names
- Province (str): province/state/prefecture names
- City (str): city names
- Country (str): country names (top level administration)
- Province (str): province names (2nd level administration)
- ISO3 (str): ISO3 codes
- Confirmed (pandas.Int64): the number of confirmed cases
- Fatal (pandas.Int64): the number of fatal cases
- Recovered (pandas.Int64): the number of recovered cases
- Population (pandas.Int64): population values
- Tests (pandas.Int64): the number of tests
- Product (pandas.Int64): vaccine product names
- Vaccinations (pandas.Int64): cumulative number of vaccinations
- Vaccinations_boosters (pandas.Int64): cumulative number of booster vaccinations
- Vaccinated_once (pandas.Int64): cumulative number of people who received at least one vaccine dose
- Vaccinated_full (pandas.Int64): cumulative number of people who received all doses prescribed by the protocol
- School_closing
- Workplace_closing
- Cancel_events
- Gatherings_restrictions
- Transport_closing
- Stay_home_restrictions
- Internal_movement_restrictions
- International_movement_restrictions
- Information_campaigns
- Testing_policy
- Contact_tracing
- Stringency_index
- Mobility_grocery_and_pharmacy: % to baseline in visits (grocery markets, pharmacies etc.)
- Mobility_parks: % to baseline in visits (parks etc.)
- Mobility_transit_stations: % to baseline in visits (public transport hubs etc.)
- Mobility_retail_and_recreation: % to baseline in visits (restaurant, museums etc.)
- Mobility_residential: % to baseline in visits (places of residence)
- Mobility_workplaces: % to baseline in visits (places of work)
Date (pandas.Timestamp): observation date
ISO3 (str): country names
Province (str): province/state/prefecture names
City (str): city names
Country (str): country names (top level administration)
Province (str): province names (2nd level administration)
ISO3 (str): ISO3 codes
Confirmed (pandas.Int64): the number of confirmed cases
Fatal (pandas.Int64): the number of fatal cases
Recovered (pandas.Int64): the number of recovered cases
Population (pandas.Int64): population values
Tests (pandas.Int64): the number of tests
Product (pandas.Int64): vaccine product names
Vaccinations (pandas.Int64): cumulative number of vaccinations
Vaccinations_boosters (pandas.Int64): cumulative number of booster vaccinations
Vaccinated_once (pandas.Int64): cumulative number of people who received at least one vaccine dose
Vaccinated_full (pandas.Int64): cumulative number of people who received all doses prescribed by the protocol
School_closing
Workplace_closing
Cancel_events
Gatherings_restrictions
Transport_closing
Stay_home_restrictions
Internal_movement_restrictions
International_movement_restrictions
Information_campaigns
Testing_policy
Contact_tracing
Stringency_index
(deprecated) Mobility_grocery_and_pharmacy: % to baseline in visits (grocery markets, pharmacies etc.)
(deprecated) Mobility_parks: % to baseline in visits (parks etc.)
(deprecated) Mobility_transit_stations: % to baseline in visits (public transport hubs etc.)
(deprecated) Mobility_retail_and_recreation: % to baseline in visits (restaurant, museums etc.)
(deprecated) Mobility_residential: % to baseline in visits (places of residence)
(deprecated) Mobility_workplaces: % to baseline in visits (places of work)
Note:
When @country is None, country-level data will be returned.
Expand All @@ -101,14 +105,21 @@ def layer(self, country=None, province=None, databases=None):
db_dict = {
"japan": _CSJapan,
"covid19dh": _COVID19dh,
"google": _GoogleOpenData,
"owid": _OWID,
"wpp": _WPP,
# Deprecated
"google": _GoogleOpenData,
}
all_databases = ["japan", "covid19dh", "google", "owid"]
all_databases = ["japan", "covid19dh", "google", "owid"] # "google" will be removed at 3.0.0
selected = Validator(databases, "databases").sequence(default=all_databases, candidates=list(db_dict.keys()))
self._gis = GIS(layers=self.LAYERS, country=self.ISO3, date=self.DATE, verbose=self._verbose)
for database in selected:
if database == "google":
warnings.warn(
"Please use `databases=['japan', 'covid19dh', 'owid']` and refer to https://github.com/lisphilar/covid19-sir/issues/1223",
DeprecationWarning,
stacklevel=2
)
db = db_dict[database](
directory=self._directory, update_interval=self._update_interval, verbose=self._verbose)
new_df = db.layer(country=country, province=province).convert_dtypes()
Expand Down
9 changes: 5 additions & 4 deletions example/01_data_preparation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
" * O. Wahltinez and others (2020), COVID-19 Open-Data: curating a fine-grained, global-scale data repository for SARS-CoV-2, Work in progress, https://goo.gle/covid-19-open-data\n",
" * percentage to baseline in visits\n",
" * Note: Please refer to [Google Terms of Service](https://policies.google.com/terms) in advance.\n",
" * **This will be removed because not updated. Refer to https://github.com/lisphilar/covid19-sir/issues/1224**\n",
"* **[World Population Prospects 2022](https://population.un.org/wpp/)**\n",
" * United Nations, Department of Economic and Social Affairs, Population Division (2022). World Population Prospects 2022, Online Edition.\n",
" * Total population in each country\n",
Expand Down Expand Up @@ -103,7 +104,7 @@
"outputs": [],
"source": [
"eng = cs.DataEngineer()\n",
"eng.download();"
"eng.download(databases=[\"japan\", \"covid19dh\", \"owid\"])"
]
},
{
Expand Down Expand Up @@ -188,7 +189,7 @@
"outputs": [],
"source": [
"eng_jpn = cs.DataEngineer()\n",
"eng_jpn.download(country=\"Japan\")\n",
"eng_jpn.download(country=\"Japan\", databases=[\"japan\", \"covid19dh\", \"owid\"])\n",
"eng_jpn.all().head()"
]
},
Expand All @@ -215,7 +216,7 @@
"outputs": [],
"source": [
"eng_alabama = cs.DataEngineer()\n",
"eng_alabama.download(country=\"USA\", province=\"Alabama\")\n",
"eng_alabama.download(country=\"USA\", province=\"Alabama\", databases=[\"japan\", \"covid19dh\", \"owid\"])\n",
"eng_alabama.all().head()"
]
},
Expand Down Expand Up @@ -245,7 +246,7 @@
"outputs": [],
"source": [
"dl = cs.DataDownloader()\n",
"dl_df = dl.layer(country=None, province=None)"
"dl_df = dl.layer(country=None, province=None, databases=[\"japan\", \"covid19dh\", \"owid\"])"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion example/02_data_engineering.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@
"outputs": [],
"source": [
"eng = cs.DataEngineer()\n",
"eng.download()\n",
"eng.download(databases=[\"japan\", \"covid19dh\", \"owid\"])\n",
"eng.all().info()"
]
},
Expand Down
4 changes: 2 additions & 2 deletions example/06_prediction.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@
"outputs": [],
"source": [
"data_eng = cs.DataEngineer()\n",
"data_eng.download().clean().transform()\n",
"data_eng.download(databases=[\"japan\", \"covid19dh\", \"owid\"]).clean().transform()\n",
"subset_df, *_ = data_eng.subset(geo=\"Japan\")\n",
"indicator_df = subset_df.drop([\"Population\", \"Susceptible\", \"Confirmed\", \"Infected\", \"Fatal\", \"Recovered\"], axis=1)\n",
"indicator_df"
Expand Down Expand Up @@ -419,7 +419,7 @@
},
"language_info": {
"name": "python",
"version": "3.9.13"
"version": "3.10.7"
},
"vscode": {
"interpreter": {
Expand Down
2 changes: 1 addition & 1 deletion tests/test_downloading/test_downloading.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class TestDataDownloader(object):
)
def test_download(self, country, province):
dl = DataDownloader()
dl.layer(country=country, province=province)
dl.layer(country=country, province=province, databases=["japan", "covid19dh", "owid"])
assert dl.citations()

@pytest.mark.parametrize(
Expand Down
2 changes: 1 addition & 1 deletion tests/test_engineering/test_engineer.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def test_operations(self):

def test_with_actual_data(self, imgfile):
engineer = DataEngineer()
engineer.download()
engineer.download(databases=["japan", "covid19dh", "owid"])
all_df = engineer.all()
layer_df = engineer.layer()
assert_frame_equal(all_df, layer_df, check_dtype=False, check_categorical=False)
Expand Down
2 changes: 1 addition & 1 deletion tests/test_science/test_ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
class TestMLEngineer(object):
def _subset(self):
data_eng = DataEngineer()
data_eng.download()
data_eng.download(databases=["japan", "covid19dh", "owid"])
data_eng.clean()
data_eng.transform()
return data_eng.subset(geo="Japan")[0]
Expand Down
2 changes: 0 additions & 2 deletions tests/test_visualization/test_visualize.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from pathlib import Path
import warnings
import matplotlib
import pytest
Expand All @@ -18,7 +17,6 @@ def test_base(self):
def test_file(self, imgfile):
with VisualizeBase(filename=imgfile):
pass
assert Path(imgfile).exists()

def test_setting(self, imgfile):
with VisualizeBase(filename=imgfile) as vb:
Expand Down

0 comments on commit 331bf39

Please sign in to comment.