From 0fea859751df4a377ebae21089632c41e61bb424 Mon Sep 17 00:00:00 2001 From: Lisphilar <7270139+lisphilar@users.noreply.github.com> Date: Fri, 23 Sep 2022 20:05:01 +0900 Subject: [PATCH 1/5] deprecate databases=["google"], #1223 --- covsirphy/downloading/downloader.py | 87 ++++++++++++++++------------- 1 file changed, 49 insertions(+), 38 deletions(-) diff --git a/covsirphy/downloading/downloader.py b/covsirphy/downloading/downloader.py index 2178e3d8d..e86928fee 100644 --- a/covsirphy/downloading/downloader.py +++ b/covsirphy/downloading/downloader.py @@ -1,6 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +import warnings from covsirphy.util.error import NotRegisteredError, SubsetNotFoundError from covsirphy.util.validator import Validator from covsirphy.util.term import Term @@ -44,50 +45,53 @@ def layer(self, country=None, province=None, databases=None): databases (list[str] or None): databases to use or None (japan, covid19dh, google, owid). "japan": COVID-19 Dataset in Japan, "covid19dh": COVID-19 Data Hub, - "google: COVID-19 Open Data by Google Cloud Platform, + "google": COVID-19 Open Data by Google Cloud Platform (deprecated), "owid": Our World In Data, "wpp": World Population Prospects by United nations. + Note: + "google" for @database was deprecated and refer to https://github.com/lisphilar/covid19-sir/issues/1223 + Returns: pandas.DataFrame: Index reset index Columns - - Date (pandas.Timestamp): observation date - - ISO3 (str): country names - - Province (str): province/state/prefecture names - - City (str): city names - - Country (str): country names (top level administration) - - Province (str): province names (2nd level administration) - - ISO3 (str): ISO3 codes - - Confirmed (pandas.Int64): the number of confirmed cases - - Fatal (pandas.Int64): the number of fatal cases - - Recovered (pandas.Int64): the number of recovered cases - - Population (pandas.Int64): population values - - Tests (pandas.Int64): the number of tests - - Product (pandas.Int64): vaccine product names - - Vaccinations (pandas.Int64): cumulative number of vaccinations - - Vaccinations_boosters (pandas.Int64): cumulative number of booster vaccinations - - Vaccinated_once (pandas.Int64): cumulative number of people who received at least one vaccine dose - - Vaccinated_full (pandas.Int64): cumulative number of people who received all doses prescribed by the protocol - - School_closing - - Workplace_closing - - Cancel_events - - Gatherings_restrictions - - Transport_closing - - Stay_home_restrictions - - Internal_movement_restrictions - - International_movement_restrictions - - Information_campaigns - - Testing_policy - - Contact_tracing - - Stringency_index - - Mobility_grocery_and_pharmacy: % to baseline in visits (grocery markets, pharmacies etc.) - - Mobility_parks: % to baseline in visits (parks etc.) - - Mobility_transit_stations: % to baseline in visits (public transport hubs etc.) - - Mobility_retail_and_recreation: % to baseline in visits (restaurant, museums etc.) - - Mobility_residential: % to baseline in visits (places of residence) - - Mobility_workplaces: % to baseline in visits (places of work) + Date (pandas.Timestamp): observation date + ISO3 (str): country names + Province (str): province/state/prefecture names + City (str): city names + Country (str): country names (top level administration) + Province (str): province names (2nd level administration) + ISO3 (str): ISO3 codes + Confirmed (pandas.Int64): the number of confirmed cases + Fatal (pandas.Int64): the number of fatal cases + Recovered (pandas.Int64): the number of recovered cases + Population (pandas.Int64): population values + Tests (pandas.Int64): the number of tests + Product (pandas.Int64): vaccine product names + Vaccinations (pandas.Int64): cumulative number of vaccinations + Vaccinations_boosters (pandas.Int64): cumulative number of booster vaccinations + Vaccinated_once (pandas.Int64): cumulative number of people who received at least one vaccine dose + Vaccinated_full (pandas.Int64): cumulative number of people who received all doses prescribed by the protocol + School_closing + Workplace_closing + Cancel_events + Gatherings_restrictions + Transport_closing + Stay_home_restrictions + Internal_movement_restrictions + International_movement_restrictions + Information_campaigns + Testing_policy + Contact_tracing + Stringency_index + (deprecated) Mobility_grocery_and_pharmacy: % to baseline in visits (grocery markets, pharmacies etc.) + (deprecated) Mobility_parks: % to baseline in visits (parks etc.) + (deprecated) Mobility_transit_stations: % to baseline in visits (public transport hubs etc.) + (deprecated) Mobility_retail_and_recreation: % to baseline in visits (restaurant, museums etc.) + (deprecated) Mobility_residential: % to baseline in visits (places of residence) + (deprecated) Mobility_workplaces: % to baseline in visits (places of work) Note: When @country is None, country-level data will be returned. @@ -101,14 +105,21 @@ def layer(self, country=None, province=None, databases=None): db_dict = { "japan": _CSJapan, "covid19dh": _COVID19dh, - "google": _GoogleOpenData, "owid": _OWID, "wpp": _WPP, + # Deprecated + "google": _GoogleOpenData, } - all_databases = ["japan", "covid19dh", "google", "owid"] + all_databases = ["japan", "covid19dh", "google", "owid"] # "google" will be removed at 3.0.0 selected = Validator(databases, "databases").sequence(default=all_databases, candidates=list(db_dict.keys())) self._gis = GIS(layers=self.LAYERS, country=self.ISO3, date=self.DATE, verbose=self._verbose) for database in selected: + if database == "google": + warnings.warn( + "Please use `databases=['japan', 'covid19dh', 'owid']` and refer to https://github.com/lisphilar/covid19-sir/issues/1223", + DeprecationWarning, + stacklevel=2 + ) db = db_dict[database]( directory=self._directory, update_interval=self._update_interval, verbose=self._verbose) new_df = db.layer(country=country, province=province).convert_dtypes() From 0725ead6331527ed3d326b2390a2be7ad182b395 Mon Sep 17 00:00:00 2001 From: Lisphilar <7270139+lisphilar@users.noreply.github.com> Date: Fri, 23 Sep 2022 20:05:11 +0900 Subject: [PATCH 2/5] test --- tests/test_downloading/test_downloading.py | 2 +- tests/test_engineering/test_engineer.py | 2 +- tests/test_science/test_ml.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_downloading/test_downloading.py b/tests/test_downloading/test_downloading.py index e5c391f3f..872812ed0 100644 --- a/tests/test_downloading/test_downloading.py +++ b/tests/test_downloading/test_downloading.py @@ -18,7 +18,7 @@ class TestDataDownloader(object): ) def test_download(self, country, province): dl = DataDownloader() - dl.layer(country=country, province=province) + dl.layer(country=country, province=province, databases=["japan", "covid19dh", "owid"]) assert dl.citations() @pytest.mark.parametrize( diff --git a/tests/test_engineering/test_engineer.py b/tests/test_engineering/test_engineer.py index 90b6d3591..bd5a87a9e 100644 --- a/tests/test_engineering/test_engineer.py +++ b/tests/test_engineering/test_engineer.py @@ -48,7 +48,7 @@ def test_operations(self): def test_with_actual_data(self, imgfile): engineer = DataEngineer() - engineer.download() + engineer.download(databases=["japan", "covid19dh", "owid"]) all_df = engineer.all() layer_df = engineer.layer() assert_frame_equal(all_df, layer_df, check_dtype=False, check_categorical=False) diff --git a/tests/test_science/test_ml.py b/tests/test_science/test_ml.py index 80a412848..9ef12dad3 100644 --- a/tests/test_science/test_ml.py +++ b/tests/test_science/test_ml.py @@ -7,7 +7,7 @@ class TestMLEngineer(object): def _subset(self): data_eng = DataEngineer() - data_eng.download() + data_eng.download(databases=["japan", "covid19dh", "owid"]) data_eng.clean() data_eng.transform() return data_eng.subset(geo="Japan")[0] From 922d89d58fd4b0ee19ce4177ffe9cf1b50c46439 Mon Sep 17 00:00:00 2001 From: Lisphilar <7270139+lisphilar@users.noreply.github.com> Date: Fri, 23 Sep 2022 20:05:16 +0900 Subject: [PATCH 3/5] docs --- example/01_data_preparation.ipynb | 9 +++++---- example/02_data_engineering.ipynb | 2 +- example/06_prediction.ipynb | 4 ++-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/example/01_data_preparation.ipynb b/example/01_data_preparation.ipynb index a334cd29b..bc556bd09 100644 --- a/example/01_data_preparation.ipynb +++ b/example/01_data_preparation.ipynb @@ -66,6 +66,7 @@ " * O. Wahltinez and others (2020), COVID-19 Open-Data: curating a fine-grained, global-scale data repository for SARS-CoV-2, Work in progress, https://goo.gle/covid-19-open-data\n", " * percentage to baseline in visits\n", " * Note: Please refer to [Google Terms of Service](https://policies.google.com/terms) in advance.\n", + " * **This will be removed because not updated. Refer to https://github.com/lisphilar/covid19-sir/issues/1224**\n", "* **[World Population Prospects 2022](https://population.un.org/wpp/)**\n", " * United Nations, Department of Economic and Social Affairs, Population Division (2022). World Population Prospects 2022, Online Edition.\n", " * Total population in each country\n", @@ -103,7 +104,7 @@ "outputs": [], "source": [ "eng = cs.DataEngineer()\n", - "eng.download();" + "eng.download(databases=[\"japan\", \"covid19dh\", \"owid\"])" ] }, { @@ -188,7 +189,7 @@ "outputs": [], "source": [ "eng_jpn = cs.DataEngineer()\n", - "eng_jpn.download(country=\"Japan\")\n", + "eng_jpn.download(country=\"Japan\", databases=[\"japan\", \"covid19dh\", \"owid\"])\n", "eng_jpn.all().head()" ] }, @@ -215,7 +216,7 @@ "outputs": [], "source": [ "eng_alabama = cs.DataEngineer()\n", - "eng_alabama.download(country=\"USA\", province=\"Alabama\")\n", + "eng_alabama.download(country=\"USA\", province=\"Alabama\", databases=[\"japan\", \"covid19dh\", \"owid\"])\n", "eng_alabama.all().head()" ] }, @@ -245,7 +246,7 @@ "outputs": [], "source": [ "dl = cs.DataDownloader()\n", - "dl_df = dl.layer(country=None, province=None)" + "dl_df = dl.layer(country=None, province=None, databases=[\"japan\", \"covid19dh\", \"owid\"])" ] }, { diff --git a/example/02_data_engineering.ipynb b/example/02_data_engineering.ipynb index 3afa11def..540e4bffd 100644 --- a/example/02_data_engineering.ipynb +++ b/example/02_data_engineering.ipynb @@ -67,7 +67,7 @@ "outputs": [], "source": [ "eng = cs.DataEngineer()\n", - "eng.download()\n", + "eng.download(databases=[\"japan\", \"covid19dh\", \"owid\"])\n", "eng.all().info()" ] }, diff --git a/example/06_prediction.ipynb b/example/06_prediction.ipynb index c02480b71..a481c2c59 100644 --- a/example/06_prediction.ipynb +++ b/example/06_prediction.ipynb @@ -193,7 +193,7 @@ "outputs": [], "source": [ "data_eng = cs.DataEngineer()\n", - "data_eng.download().clean().transform()\n", + "data_eng.download(databases=[\"japan\", \"covid19dh\", \"owid\"]).clean().transform()\n", "subset_df, *_ = data_eng.subset(geo=\"Japan\")\n", "indicator_df = subset_df.drop([\"Population\", \"Susceptible\", \"Confirmed\", \"Infected\", \"Fatal\", \"Recovered\"], axis=1)\n", "indicator_df" @@ -419,7 +419,7 @@ }, "language_info": { "name": "python", - "version": "3.9.13" + "version": "3.10.7" }, "vscode": { "interpreter": { From 3482eba7ce2cd792fb40961a502db43d965804d9 Mon Sep 17 00:00:00 2001 From: Lisphilar <7270139+lisphilar@users.noreply.github.com> Date: Fri, 23 Sep 2022 20:18:53 +0900 Subject: [PATCH 4/5] test --- tests/test_visualization/test_visualize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_visualization/test_visualize.py b/tests/test_visualization/test_visualize.py index 195abdef9..0ebb06209 100644 --- a/tests/test_visualization/test_visualize.py +++ b/tests/test_visualization/test_visualize.py @@ -18,7 +18,7 @@ def test_base(self): def test_file(self, imgfile): with VisualizeBase(filename=imgfile): pass - assert Path(imgfile).exists() + assert Path(imgfile).resolve().exists() def test_setting(self, imgfile): with VisualizeBase(filename=imgfile) as vb: From 0d2178c040586b2ef7beae69599b4e3b4cd64718 Mon Sep 17 00:00:00 2001 From: Lisphilar <7270139+lisphilar@users.noreply.github.com> Date: Fri, 23 Sep 2022 20:31:21 +0900 Subject: [PATCH 5/5] test --- tests/test_visualization/test_visualize.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/test_visualization/test_visualize.py b/tests/test_visualization/test_visualize.py index 0ebb06209..4e6ea53e2 100644 --- a/tests/test_visualization/test_visualize.py +++ b/tests/test_visualization/test_visualize.py @@ -1,7 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -from pathlib import Path import warnings import matplotlib import pytest @@ -18,7 +17,6 @@ def test_base(self): def test_file(self, imgfile): with VisualizeBase(filename=imgfile): pass - assert Path(imgfile).resolve().exists() def test_setting(self, imgfile): with VisualizeBase(filename=imgfile) as vb: