3939pd .options .mode .copy_on_write = True
4040
4141
42- def read_population_data ():
42+ def read_population_data (ref_year ):
4343 """! Reads Population data from regionalstatistik.de
4444
4545 A request is made to regionalstatistik.de and the StringIO is read in as a csv into the dataframe format.
46+ @param ref_year [Default: None] or year (jjjj) convertible to str. Reference year.
4647 @return DataFrame
4748 """
48-
49- download_url = 'https://www.regionalstatistik.de/genesis/online?operation=download&code=12411-02-03-4&option=csv'
50- req = requests .get (download_url )
51- df_pop_raw = pd .read_csv (io .StringIO (req .text ), sep = ';' , header = 6 )
52-
53- return df_pop_raw
54-
55-
56- def export_population_dataframe (df_pop : pd .DataFrame , directory : str , file_format : str , merge_eisenach : bool ):
49+ if ref_year is not None :
50+ try :
51+ download_url = 'https://www.regionalstatistik.de/genesis/online?operation=download&code=12411-02-03-4&option=csv&zeiten=' + \
52+ str (ref_year )
53+ req = requests .get (download_url )
54+ df_pop_raw = pd .read_csv (io .StringIO (req .text ), sep = ';' , header = 6 )
55+ except pd .errors .ParserError :
56+ gd .default_print ('Warning' , 'Data for year ' + str (ref_year ) +
57+ ' is not available; downloading newest data instead.' )
58+ ref_year = None
59+ if ref_year is None :
60+ download_url = 'https://www.regionalstatistik.de/genesis/online?operation=download&code=12411-02-03-4&option=csv'
61+ req = requests .get (download_url )
62+ df_pop_raw = pd .read_csv (io .StringIO (req .text ), sep = ';' , header = 6 )
63+
64+ return df_pop_raw , ref_year
65+
66+
67+ def export_population_dataframe (df_pop : pd .DataFrame , directory : str , file_format : str , merge_eisenach : bool , ref_year ):
5768 """! Writes population dataframe into directory with new column names and age groups
5869
5970 @param df_pop Population data DataFrame to be exported
@@ -62,6 +73,7 @@ def export_population_dataframe(df_pop: pd.DataFrame, directory: str, file_forma
6273 @param merge_eisenach Defines whether the counties 'Wartburgkreis'
6374 and 'Eisenach' are listed separately or
6475 combined as one entity 'Wartburgkreis'.
76+ @param ref_year None or year (jjjj) convertible to str. Reference year.
6577 @return exported DataFrame
6678 """
6779
@@ -111,19 +123,20 @@ def export_population_dataframe(df_pop: pd.DataFrame, directory: str, file_forma
111123
112124 gd .check_dir (directory )
113125
114- if len (df_pop_export ) == 401 :
115- filename = 'county_current_population_dim401'
116- gd .write_dataframe (df_pop_export , directory , filename , file_format )
117-
118- if len (df_pop_export ) == 400 or merge_eisenach :
126+ if ref_year is None :
119127 filename = 'county_current_population'
128+ else :
129+ filename = 'county_' + str (ref_year ) + '_population'
130+
131+ if len (df_pop_export ) == 401 :
132+ filename = filename + '_dim401'
120133
121- # Merge Eisenach and Wartburgkreis
122- df_pop_export = geoger .merge_df_counties_all (
123- df_pop_export , sorting = [dd .EngEng ["idCounty" ]],
124- columns = dd .EngEng ["idCounty" ])
134+ # Merge Eisenach and Wartburgkreis
135+ df_pop_export = geoger .merge_df_counties_all (
136+ df_pop_export , sorting = [dd .EngEng ["idCounty" ]],
137+ columns = dd .EngEng ["idCounty" ])
125138
126- gd .write_dataframe (df_pop_export , directory , filename , file_format )
139+ gd .write_dataframe (df_pop_export , directory , filename , file_format )
127140
128141 return df_pop_export
129142
@@ -203,23 +216,20 @@ def test_total_population(df_pop, age_cols):
203216 @param df_pop Population Dataframe with all counties
204217 @param age_cols All age groups in DataFrame"""
205218
206- total_sum_2020 = 83155031
207- total_sum_2021 = 83237124
208- total_sum_2022 = 84358845
219+ total_sum_expect = 84e6
209220 total_sum = df_pop [age_cols ].sum ().sum ()
210221
211- if total_sum == total_sum_2022 :
212- pass
213- elif total_sum == total_sum_2021 :
214- warnings .warn ('Using data of 2021. Newer data is available.' )
215- elif total_sum == total_sum_2020 :
216- warnings .warn ('Using data of 2020. Newer data is available.' )
217- else :
218- raise gd .DataError ('Total Population does not match expectation.' )
222+ if not isinstance (total_sum , (int , np .integer )):
223+ raise gd .DataError ('Unexpected dtypes in Population Data.' )
224+ # check if total population is +-5% accurate to 2024 population
225+ if (total_sum > 1.05 * total_sum_expect ) or (total_sum < 0.95 * total_sum_expect ):
226+ gd .default_print (
227+ 'Warning' , 'Total Population does not match expectation.' )
219228
220229
221230def fetch_population_data (read_data : bool = dd .defaultDict ['read_data' ],
222231 out_folder : str = dd .defaultDict ['out_folder' ],
232+ ref_year = None ,
223233 ** kwargs
224234 ) -> pd .DataFrame :
225235 """! Downloads or reads the population data.
@@ -232,6 +242,7 @@ def fetch_population_data(read_data: bool = dd.defaultDict['read_data'],
232242 downloaded. Default defined in defaultDict.
233243 @param out_folder Path to folder where data is written in folder
234244 out_folder/Germany. Default defined in defaultDict.
245+ @param ref_year [Default: None] or year (jjjj) convertible to str. Reference year.
235246 @return DataFrame with adjusted population data for all ages to current level.
236247 """
237248 conf = gd .Conf (out_folder , ** kwargs )
@@ -245,9 +256,9 @@ def fetch_population_data(read_data: bool = dd.defaultDict['read_data'],
245256 directory = os .path .join (out_folder , 'Germany' )
246257 gd .check_dir (directory )
247258
248- df_pop_raw = read_population_data ()
259+ df_pop_raw , ref_year = read_population_data (ref_year )
249260
250- return df_pop_raw
261+ return df_pop_raw , ref_year
251262
252263
253264def preprocess_population_data (df_pop_raw : pd .DataFrame ,
@@ -310,7 +321,8 @@ def preprocess_population_data(df_pop_raw: pd.DataFrame,
310321def write_population_data (df_pop : pd .DataFrame ,
311322 out_folder : str = dd .defaultDict ['out_folder' ],
312323 file_format : str = dd .defaultDict ['file_format' ],
313- merge_eisenach : bool = True
324+ merge_eisenach : bool = True ,
325+ ref_year = None
314326 ) -> None or pd .DataFrame :
315327 """! Write the population data into json files
316328 Three kinds of structuring of the data are done.
@@ -324,19 +336,21 @@ def write_population_data(df_pop: pd.DataFrame,
324336 @param merge_eisenach [Default: True] or False. Defines whether the
325337 counties 'Wartburgkreis' and 'Eisenach' are listed separately or
326338 combined as one entity 'Wartburgkreis'.
339+ @param ref_year [Default: None] or year (jjjj) convertible to str. Reference year.
327340
328341 @return None
329342 """
330343 directory = os .path .join (out_folder , 'Germany' )
331344 df_pop_export = export_population_dataframe (
332- df_pop , directory , file_format , merge_eisenach )
345+ df_pop , directory , file_format , merge_eisenach , ref_year )
333346 return df_pop_export
334347
335348
336349def get_population_data (read_data : bool = dd .defaultDict ['read_data' ],
337350 file_format : str = dd .defaultDict ['file_format' ],
338351 out_folder : str = dd .defaultDict ['out_folder' ],
339352 merge_eisenach : bool = True ,
353+ ref_year = None ,
340354 ** kwargs
341355 ):
342356 """! Download age-stratified population data for the German counties.
@@ -369,14 +383,16 @@ def get_population_data(read_data: bool = dd.defaultDict['read_data'],
369383 @param merge_eisenach [Default: True] or False. Defines whether the
370384 counties 'Wartburgkreis' and 'Eisenach' are listed separately or
371385 combined as one entity 'Wartburgkreis'.
386+ @param ref_year [Default: None] or year (jjjj) convertible to str. Reference year.
372387 @param username str. Username to sign in at regionalstatistik.de.
373388 @param password str. Password to sign in at regionalstatistik.de.
374389 @return DataFrame with adjusted population data for all ages to current level.
375390 """
376- raw_df = fetch_population_data (
391+ raw_df , ref_year = fetch_population_data (
377392 read_data = read_data ,
378393 out_folder = out_folder ,
379394 file_format = file_format ,
395+ ref_year = ref_year ,
380396 ** kwargs
381397 )
382398 preprocess_df = preprocess_population_data (
@@ -387,7 +403,8 @@ def get_population_data(read_data: bool = dd.defaultDict['read_data'],
387403 df_pop = preprocess_df ,
388404 file_format = file_format ,
389405 out_folder = out_folder ,
390- merge_eisenach = True
406+ merge_eisenach = True ,
407+ ref_year = ref_year
391408 )
392409 return df_pop_export
393410
0 commit comments