1404 Adjust epidata code to new nuts3 file structure (#1405)

HenrZu · web-flow · commit c11273aa898f · 2025-10-29T10:45:21.000+01:00
- Adapt file and test to new structure of the file
- Since not every county is reporting divi data, we reduce the bound for minimum data
- Add download for commuter data since order can be arbitrary
diff --git a/pycode/memilio-epidata/memilio/epidata/geoModificationGermany.py b/pycode/memilio-epidata/memilio/epidata/geoModificationGermany.py
@@ -341,20 +341,22 @@ def get_official_county_table():
         file = gd.download_file(url_counties, 1024, None,
                                 p.set_progress, verify=False)
     county_table = pd.read_excel(
-        file, sheet_name=1, header=5, engine=gd.Conf.excel_engine)
+        file, sheet_name=1, header=1, engine=gd.Conf.excel_engine)
     rename_kreise_deu_dict = {
         1: dd.EngEng['idCounty'],
-        '2': "type",  # name not important, column not used so far
-        3: dd.EngEng['county'],
-        4: dd.EngEng['nuts3'],
-        5: dd.EngEng['area'],
-        6: dd.EngEng['population'],
-        7: "population_male",  # name not important, column not used so far
-        8: "population_female",  # name not important, column not used so far
-        9: "population_per_km2"  # name not important, column not used so far
+        2: dd.EngEng['county'],
+        3: dd.EngEng['nuts3'],
+        4: dd.EngEng['area'],
+        5: dd.EngEng['population'],
+        6: "population_male",  # name not important, column not used so far
+        7: "population_female",  # name not important, column not used so far
+        8: "population_per_km2"  # name not important, column not used so far
     }
     # rename columns
-    county_table.rename(columns=rename_kreise_deu_dict, inplace=True)
+    county_table.columns = [
+        rename_kreise_deu_dict.get(i + 1, old_name)
+        for i, old_name in enumerate(county_table.columns)
+    ]
 
     return county_table
 
diff --git a/pycode/memilio-epidata/memilio/epidata/getDIVIData.py b/pycode/memilio-epidata/memilio/epidata/getDIVIData.py
@@ -349,7 +349,7 @@ def divi_data_sanity_checks(df: pd.DataFrame) -> None:
     # Maybe we should look for a new method to sanitize the size of the
     # DataFrame.
     num_dates = (date.today() - date(2020, 4, 24)).days
-    min_num_data = 380 * num_dates  # not all 400 counties report every day
+    min_num_data = 300 * num_dates  # not all 400 counties report every day
     max_num_data = 400 * num_dates
     if (len(df) < min_num_data) or (len(df) > max_num_data):
         raise gd.DataError("Error: unexpected length of dataframe.")
diff --git a/pycode/memilio-epidata/memilio/epidata_test/test_epidata_geoModificationGermany.py b/pycode/memilio-epidata/memilio/epidata_test/test_epidata_geoModificationGermany.py
@@ -107,7 +107,7 @@ class Test_geoModificationGermany(fake_filesystem_unittest.TestCase):
         '083', '084', '091', '092', '093', '094', '095', '096', '097', '100',
         '110', '120', '130', '145', '146', '147', '150', '160']
     county_table_test_headers = [
-        'ID_County', 'type', 'County', 'NUTS3', 'Area', 'Population',
+        'ID_County', 'County', 'NUTS3', 'Area', 'Population',
         'population_male', 'population_female', 'population_per_km2']
     test_list_regions1 = list(range(32))+[33]
     test_list_regions2 = [str(i).zfill(2) for i in range(32)]+['33']
diff --git a/pycode/memilio-epidata/memilio/epidata_test/test_epidata_getCommuterMobility.py b/pycode/memilio-epidata/memilio/epidata_test/test_epidata_getCommuterMobility.py
@@ -178,6 +178,13 @@ def test_get_neighbors_mobility(self, mock_print, mock_input, mock_popul):
 
         """
 
+        gcm.get_commuter_data(
+            out_folder=self.path,
+            ref_year=2022,
+            interactive=True,
+            read_data=False
+        )
+
         testcountyid = 1051
         # direction = both
         (countykey_list, commuter_all) = gcm.get_neighbors_mobility(