Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/epidata_main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ jobs:
run: |
mkdir -p data_dl
getcasedata -o data_dl --no-progress-indicators
getpopuldata -o data_dl --no-progress-indicators --username=${{ secrets.REGIODBUSER }} --password=${{ secrets.REGIODBPW }}
getpopuldata -o data_dl --no-progress-indicators
getjhdata -o data_dl --no-progress-indicators
getdividata -o data_dl --no-progress-indicators
getcommutermobility -o data_dl --no-progress-indicators
Expand Down
4 changes: 0 additions & 4 deletions pycode/memilio-epidata/memilio/epidata/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -130,10 +130,6 @@ optional arguments working for some are:
| --sanitize-data | Different ways to distribute vaccinations to home |
| | locations of vaccinated persons[vaccination] |
+---------------------------------------------+-----------------------------------------------------------+
| --username | Username for regionalstatistik.de [population] |
+---------------------------------------------+-----------------------------------------------------------+
| --password | Password for regionalstatistik.de [population] |
+---------------------------------------------+-----------------------------------------------------------+
| --files | Files to write [case] |
+---------------------------------------------+-----------------------------------------------------------+

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -364,8 +364,6 @@ def cli(what):
- verbose
- skip_checks
- no_raw
- username
- password
- to_dataset

@param what Defines what packages calls and thus what kind of command line arguments should be defined.
Expand All @@ -379,7 +377,7 @@ def cli(what):

cli_dict = {"divi": ['Downloads data from DIVI', 'start_date', 'end_date', 'impute_dates', 'moving_average'],
"cases": ['Download case data from RKI', 'start_date', 'end_date', 'impute_dates', 'moving_average', 'split_berlin', 'rep_date', 'files'],
"population": ['Download population data from official sources', 'username'],
"population": ['Download population data from official sources'],
"commuter_official": ['Download commuter data from official sources'],
"vaccination": ['Download vaccination data', 'start_date', 'end_date', 'impute_dates', 'moving_average', 'sanitize_data'],
"testing": ['Download testing data', 'start_date', 'end_date', 'impute_dates', 'moving_average'],
Expand Down Expand Up @@ -498,14 +496,6 @@ def cli(what):
'--skip-checks', dest='run_checks', action='store_false',
help='Skips sanity checks etc.')

if 'username' in what_list:
parser.add_argument(
'--username', type=str
)

parser.add_argument(
'--password', type=str
)
if '--to-dataset' in sys.argv:
parser.add_argument(
'--to-dataset', dest='to_dataset',
Expand Down
84 changes: 3 additions & 81 deletions pycode/memilio-epidata/memilio/epidata/getPopulationData.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,7 @@
@brief Downloads data about population statistic

"""
import configparser
import warnings
import getpass
import requests
import os
import io
Expand All @@ -41,82 +39,19 @@
pd.options.mode.copy_on_write = True


def read_population_data(username, password):
def read_population_data():
"""! Reads Population data from regionalstatistik.de

Username and Password are required to sign in on regionalstatistik.de.
A request is made to regionalstatistik.de and the StringIO is read in as a csv into the dataframe format.

@param username Username to sign in at regionalstatistik.de.
@param password Password to sign in at regionalstatistik.de.
@return DataFrame
"""

download_url = 'https://www.regionalstatistik.de/genesis/online?operation=download&code=12411-02-03-4&option=csv'
req = requests.get(download_url, auth=(username, password))
req = requests.get(download_url)
df_pop_raw = pd.read_csv(io.StringIO(req.text), sep=';', header=6)

return df_pop_raw

# This function is needed for unittests
# Fakefilesystem has problems with os.path


def path_to_credential_file():
"""! Returns path to .ini file where credentials are stored.
The Path can be changed if neccessary.
"""
return os.path.join(os.path.dirname(os.path.abspath(__file__)), 'CredentialsRegio.ini')


def manage_credentials(interactive):
"""! Manages credentials for regionalstatistik.de (needed for dowload).

A connfig file inside the epidata folder is either written (if not existent yet)
with input from user or read with following format:
[CREDENTIALS]
Username = XXXXX
Password = XXXXX

@return Username and password to sign in at regionalstatistik.de.
"""
# path where ini file is found
path = path_to_credential_file()

gd.default_print(
'Info', 'No passwaord and/or username for regionalstatistik.de provided. Try to read from .ini file.')

# check if .ini file exists
if not os.path.exists(path):
if interactive:
gd.default_print(
'Info', '.ini file not found. Writing CredentialsRegio.ini...')
username = input(
"Please enter username for https://www.regionalstatistik.de/genesis/online\n")
password = getpass.getpass(
"Please enter password for https://www.regionalstatistik.de/genesis/online\n")
# create file
write_ini = gd.user_choice(
message='Do you want the credentials to be stored in an unencrypted .ini file?\n' +
'The next time this function is called, the credentials can be read from that file.')
if write_ini:
string = '[CREDENTIALS]\nUsername = ' + \
username+'\nPassword = '+password
with open(path, 'w+') as file:
file.write(string)
else:
raise gd.DataError(
'No .ini file found. Cannot access regionalstatistik.de for downloading population data.')

else:
parser = configparser.ConfigParser()
parser.read(path)

username = parser['CREDENTIALS']['Username']
password = parser['CREDENTIALS']['Password']

return username, password


def export_population_dataframe(df_pop: pd.DataFrame, directory: str, file_format: str, merge_eisenach: bool):
"""! Writes population dataframe into directory with new column names and age groups
Expand Down Expand Up @@ -285,8 +220,6 @@ def test_total_population(df_pop, age_cols):

def fetch_population_data(read_data: bool = dd.defaultDict['read_data'],
out_folder: str = dd.defaultDict['out_folder'],
username='',
password='',
**kwargs
) -> pd.DataFrame:
"""! Downloads or reads the population data.
Expand All @@ -299,9 +232,6 @@ def fetch_population_data(read_data: bool = dd.defaultDict['read_data'],
downloaded. Default defined in defaultDict.
@param out_folder Path to folder where data is written in folder
out_folder/Germany. Default defined in defaultDict.
@param username Username to sign in at regionalstatistik.de.
@param password Password to sign in at regionalstatistik.de.

@return DataFrame with adjusted population data for all ages to current level.
"""
conf = gd.Conf(out_folder, **kwargs)
Expand All @@ -312,14 +242,10 @@ def fetch_population_data(read_data: bool = dd.defaultDict['read_data'],
'Warning', 'Read_data is not supportet for getPopulationData.py. Setting read_data = False')
read_data = False

# If no username or password is provided, the credentials are either read from an .ini file or,
# if the file does not exist they have to be given as user input.
if (username is None) or (password is None):
username, password = manage_credentials(conf.interactive)
directory = os.path.join(out_folder, 'Germany')
gd.check_dir(directory)

df_pop_raw = read_population_data(username, password)
df_pop_raw = read_population_data()

return df_pop_raw

Expand Down Expand Up @@ -411,8 +337,6 @@ def get_population_data(read_data: bool = dd.defaultDict['read_data'],
file_format: str = dd.defaultDict['file_format'],
out_folder: str = dd.defaultDict['out_folder'],
merge_eisenach: bool = True,
username='',
password='',
**kwargs
):
"""! Download age-stratified population data for the German counties.
Expand Down Expand Up @@ -453,8 +377,6 @@ def get_population_data(read_data: bool = dd.defaultDict['read_data'],
read_data=read_data,
out_folder=out_folder,
file_format=file_format,
username=username,
password=password,
**kwargs
)
preprocess_df = preprocess_population_data(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,7 @@ def test_call_functions(
# change start-date of jh to 2020-01-22
arg_dict_jh["start_date"] = date(2020, 1, 22)

arg_dict_popul = {**arg_dict_all, "username": None, "password": None}
arg_dict_popul = {**arg_dict_all}

getVaccinationData.main()
mock_vaccination.assert_called()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,6 @@ class Test_getPopulationData(fake_filesystem_unittest.TestCase):

path = '/home/Population_Data'

config_file_name = 'CredentialsRegio.ini'
test_username = 'username_test'
test_password = 'password_test'

here = os.path.dirname(os.path.abspath(__file__))
filename = os.path.join(
here, 'test_data', 'TestSetPopulationExport.json')
Expand Down Expand Up @@ -71,53 +67,10 @@ def test_export_population_data(self):
return_value=df_pop_raw)
@patch('memilio.epidata.getPopulationData.assign_population_data', return_value=df_pop)
@patch('memilio.epidata.getPopulationData.test_total_population')
def test_get_population_data_full(self, mock_test, mock_export, mock_download):
def test_get_population_data_full(self, mock_test, mock_assign, mock_download):
# should not raise any errors
gpd.get_population_data(out_folder=self.path)

@patch('builtins.input', return_value=test_username)
@patch('getpass.getpass', return_value=test_password)
@patch('memilio.epidata.getDataIntoPandasDataFrame.user_choice', return_value=True)
@patch('memilio.epidata.getPopulationData.path_to_credential_file', return_value='./CredentialsRegio.ini')
@patch('memilio.epidata.getPopulationData.read_population_data', return_value=df_pop_raw)
@patch('memilio.epidata.getPopulationData.assign_population_data', return_value=df_pop)
@patch('memilio.epidata.getPopulationData.test_total_population')
def test_config_write(self, mock_test, mock_export, mock_raw, mock_path, mock_choice, mock_pw, mock_un):
# username and password should be written into the config file.
# The download and assigning to counties of the population data is mocked.
gpd.get_population_data(username=None, password=None, interactive=True)
# Check if the file is written.
self.assertTrue(self.config_file_name in os.listdir(os.getcwd()))
# Check content of the file.
# Read file.
parser = configparser.ConfigParser()
parser.read(os.path.join(os.getcwd(), self.config_file_name))
# Test content.
self.assertEqual(parser['CREDENTIALS']['Username'], self.test_username)
self.assertEqual(parser['CREDENTIALS']['Password'], self.test_password)

@patch('memilio.epidata.getPopulationData.path_to_credential_file', return_value='./CredentialsRegio.ini')
@patch('memilio.epidata.getPopulationData.read_population_data', return_value=df_pop_raw)
@patch('memilio.epidata.getPopulationData.assign_population_data', return_value=df_pop)
@patch('memilio.epidata.getPopulationData.test_total_population')
def test_config_read(self, mock_test, mock_export, mock_read, mock_path):
# File should not exist yet.
self.assertFalse(self.config_file_name in os.listdir(os.getcwd()))
# Create config file.
string = '[CREDENTIALS]\nUsername = ' + \
self.test_username+'\nPassword = '+self.test_password
path = os.path.join(os.getcwd(), self.config_file_name)
with open(path, 'w+') as file:
file.write(string)
# Check if the file is written.
self.assertTrue(self.config_file_name in os.listdir(os.getcwd()))
# The download and assigning to counties of the population data is mocked.
gpd.get_population_data(
username=None, password=None, read_data=False, out_folder=self.path, interactive=False)
# The file exist in the directory (mocked) and the credentials should be read.
mock_read.assert_called_with(
self.test_username, self.test_password)


if __name__ == '__main__':
unittest.main()