Skip to content

Commit c11273a

Browse files
authored
1404 Adjust epidata code to new nuts3 file structure (#1405)
- Adapt file and test to new structure of the file - Since not every county is reporting divi data, we reduce the bound for minimum data - Add download for commuter data since order can be arbitrary
1 parent 5d5091c commit c11273a

File tree

4 files changed

+21
-12
lines changed

4 files changed

+21
-12
lines changed

pycode/memilio-epidata/memilio/epidata/geoModificationGermany.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -341,20 +341,22 @@ def get_official_county_table():
341341
file = gd.download_file(url_counties, 1024, None,
342342
p.set_progress, verify=False)
343343
county_table = pd.read_excel(
344-
file, sheet_name=1, header=5, engine=gd.Conf.excel_engine)
344+
file, sheet_name=1, header=1, engine=gd.Conf.excel_engine)
345345
rename_kreise_deu_dict = {
346346
1: dd.EngEng['idCounty'],
347-
'2': "type", # name not important, column not used so far
348-
3: dd.EngEng['county'],
349-
4: dd.EngEng['nuts3'],
350-
5: dd.EngEng['area'],
351-
6: dd.EngEng['population'],
352-
7: "population_male", # name not important, column not used so far
353-
8: "population_female", # name not important, column not used so far
354-
9: "population_per_km2" # name not important, column not used so far
347+
2: dd.EngEng['county'],
348+
3: dd.EngEng['nuts3'],
349+
4: dd.EngEng['area'],
350+
5: dd.EngEng['population'],
351+
6: "population_male", # name not important, column not used so far
352+
7: "population_female", # name not important, column not used so far
353+
8: "population_per_km2" # name not important, column not used so far
355354
}
356355
# rename columns
357-
county_table.rename(columns=rename_kreise_deu_dict, inplace=True)
356+
county_table.columns = [
357+
rename_kreise_deu_dict.get(i + 1, old_name)
358+
for i, old_name in enumerate(county_table.columns)
359+
]
358360

359361
return county_table
360362

pycode/memilio-epidata/memilio/epidata/getDIVIData.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -349,7 +349,7 @@ def divi_data_sanity_checks(df: pd.DataFrame) -> None:
349349
# Maybe we should look for a new method to sanitize the size of the
350350
# DataFrame.
351351
num_dates = (date.today() - date(2020, 4, 24)).days
352-
min_num_data = 380 * num_dates # not all 400 counties report every day
352+
min_num_data = 300 * num_dates # not all 400 counties report every day
353353
max_num_data = 400 * num_dates
354354
if (len(df) < min_num_data) or (len(df) > max_num_data):
355355
raise gd.DataError("Error: unexpected length of dataframe.")

pycode/memilio-epidata/memilio/epidata_test/test_epidata_geoModificationGermany.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ class Test_geoModificationGermany(fake_filesystem_unittest.TestCase):
107107
'083', '084', '091', '092', '093', '094', '095', '096', '097', '100',
108108
'110', '120', '130', '145', '146', '147', '150', '160']
109109
county_table_test_headers = [
110-
'ID_County', 'type', 'County', 'NUTS3', 'Area', 'Population',
110+
'ID_County', 'County', 'NUTS3', 'Area', 'Population',
111111
'population_male', 'population_female', 'population_per_km2']
112112
test_list_regions1 = list(range(32))+[33]
113113
test_list_regions2 = [str(i).zfill(2) for i in range(32)]+['33']

pycode/memilio-epidata/memilio/epidata_test/test_epidata_getCommuterMobility.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,13 @@ def test_get_neighbors_mobility(self, mock_print, mock_input, mock_popul):
178178
179179
"""
180180

181+
gcm.get_commuter_data(
182+
out_folder=self.path,
183+
ref_year=2022,
184+
interactive=True,
185+
read_data=False
186+
)
187+
181188
testcountyid = 1051
182189
# direction = both
183190
(countykey_list, commuter_all) = gcm.get_neighbors_mobility(

0 commit comments

Comments
 (0)