Skip to content

Commit d33b623

Browse files
HenrZuxsaschako
andauthored
1148 Rework Data structure and Rename Mobility and Vaccination file (#1150)
- The name for the vaccination file was outdated in the epidata Package. Updated from all_county_ageinf_vacc to vacc_county_ageinf - Remove default mobility matrix file. - Extend function getSimulationData to really provide all files to execute the simulations. - Remove Twitter data. - Rework data structure, where the region are now the first instance. - clean_data cleans all created data via the pycode Co-authored-by: xsaschako <51127093+xsaschako@users.noreply.github.com>
1 parent a985af3 commit d33b623

File tree

72 files changed

+658
-1460
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

72 files changed

+658
-1460
lines changed

cpp/examples/CMakeLists.txt

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -100,10 +100,6 @@ add_executable(abm_history_example abm_history_object.cpp)
100100
target_link_libraries(abm_history_example PRIVATE memilio abm)
101101
target_compile_options(abm_history_example PRIVATE ${MEMILIO_CXX_FLAGS_ENABLE_WARNING_ERRORS})
102102

103-
add_executable(twitter_mobility_example twitter_mobility.cpp)
104-
target_link_libraries(twitter_mobility_example PRIVATE memilio ode_secir)
105-
target_compile_options(twitter_mobility_example PRIVATE ${MEMILIO_CXX_FLAGS_ENABLE_WARNING_ERRORS})
106-
107103
add_executable(ide_seir_example ide_seir.cpp)
108104
target_link_libraries(ide_seir_example PRIVATE memilio ide_seir)
109105
target_compile_options(ide_seir_example PRIVATE ${MEMILIO_CXX_FLAGS_ENABLE_WARNING_ERRORS})

cpp/examples/ode_secir_read_graph.cpp

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@
2828
std::string setup(int argc, char** argv, const std::string data_dir)
2929
{
3030
if (argc == 2) {
31-
std::cout << "Using file " << argv[1] << " in data/mobility." << std::endl;
32-
return mio::path_join(data_dir, "mobility", (std::string)argv[1]);
31+
std::cout << "Using file " << argv[1] << " in data/Germany/mobility." << std::endl;
32+
return mio::path_join(data_dir, "Germany", "mobility", (std::string)argv[1]);
3333
}
3434
else {
3535
if (argc > 2) {
@@ -38,13 +38,14 @@ std::string setup(int argc, char** argv, const std::string data_dir)
3838
else {
3939
mio::log_warning("No arguments given.");
4040
}
41-
std::cout << "Using default file twitter_scaled_1252 in data/mobility." << std::endl;
41+
auto mobility_file = "commuter_mobility_2022.txt";
42+
std::cout << "Using file " << mobility_file << " in data/Germany/mobility." << std::endl;
4243
std::cout << "Usage: read_graph MOBILITY_FILE"
4344
<< "\n\n";
44-
std::cout << "This example performs a simulation based on twitter "
45-
"mobility data."
46-
<< std::endl;
47-
return mio::path_join(data_dir, "mobility", "twitter_scaled_1252.txt");
45+
std::cout
46+
<< "This example performs a simulation based on mobility data from the German Federal Employment Agency."
47+
<< std::endl;
48+
return mio::path_join(data_dir, "Germany", "mobility", mobility_file);
4849
}
4950
}
5051

@@ -113,21 +114,22 @@ int main(int argc, char** argv)
113114
auto read_mobility_result = mio::read_mobility_plain(filename);
114115
if (!read_mobility_result) {
115116
std::cout << read_mobility_result.error().formatted_message() << '\n';
116-
return -1;
117+
std::cout << "Create the mobility file with MEmilio Epidata's getCommuterMobility.py file." << '\n';
118+
return 0;
117119
}
118-
auto& twitter_mobility_2018 = read_mobility_result.value();
120+
auto& commuter_mobility = read_mobility_result.value();
119121
std::cout << "Done" << std::endl;
120122

121123
std::cout << "Intializing Graph..." << std::flush;
122124
mio::Graph<mio::osecir::Model<FP>, mio::MobilityParameters<FP>> graph;
123-
for (int node = 0; node < twitter_mobility_2018.rows(); node++) {
125+
for (int node = 0; node < commuter_mobility.rows(); node++) {
124126
graph.add_node(node, model);
125127
}
126-
for (int row = 0; row < twitter_mobility_2018.rows(); row++) {
127-
for (int col = 0; col < twitter_mobility_2018.cols(); col++) {
128+
for (int row = 0; row < commuter_mobility.rows(); row++) {
129+
for (int col = 0; col < commuter_mobility.cols(); col++) {
128130
graph.add_edge(row, col,
129131
Eigen::VectorXd::Constant(10 * (size_t)nb_groups,
130-
twitter_mobility_2018(row, col) /
132+
commuter_mobility(row, col) /
131133
graph.nodes()[row].property.populations.get_total()));
132134
}
133135
}

cpp/examples/twitter_mobility.cpp

Lines changed: 0 additions & 34 deletions
This file was deleted.

cpp/memilio/io/mobility_io.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@
1717
* See the License for the specific language governing permissions and
1818
* limitations under the License.
1919
*/
20-
#ifndef READ_TWITTER_H
21-
#define READ_TWITTER_H
20+
#ifndef MEMILIO_IO_MOBILITY_IO_H
21+
#define MEMILIO_IO_MOBILITY_IO_H
2222

2323
#include "memilio/io/json_serializer.h"
2424
#include "memilio/mobility/graph.h"
@@ -214,4 +214,4 @@ IOResult<void> save_edges(const std::vector<std::vector<TimeSeries<double>>>& en
214214

215215
} // namespace mio
216216

217-
#endif // READ_TWITTER_H
217+
#endif // MEMILIO_IO_MOBILITY_IO_H

cpp/memilio/mobility/graph.h

Lines changed: 5 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -325,7 +325,7 @@ IOResult<void> set_nodes(const Parameters& params, Date start_date, Date end_dat
325325
/**
326326
* @brief Sets the graph edges.
327327
* Reads the commuting matrices from txt files and sets the graph edges with that.
328-
* @param[in] data_dir Directory that contains the data files.
328+
* @param[in] mobility_data_file File that contains the commuting matrix.
329329
* @param[in, out] params_graph Graph whose nodes are set by the function.
330330
* @param[in] mobile_compartments Compartments that commute.
331331
* @param[in] contact_locations_size Number of contact locations.
@@ -335,20 +335,15 @@ IOResult<void> set_nodes(const Parameters& params, Date start_date, Date end_dat
335335
*/
336336
template <class ContactLocation, class Model, class MobilityParams, class MobilityCoefficientGroup,
337337
class InfectionState, class ReadFunction>
338-
IOResult<void> set_edges(const fs::path& data_dir, Graph<Model, MobilityParams>& params_graph,
338+
IOResult<void> set_edges(const fs::path& mobility_data_file, Graph<Model, MobilityParams>& params_graph,
339339
std::initializer_list<InfectionState>& mobile_compartments, size_t contact_locations_size,
340340
ReadFunction&& read_func, std::vector<ScalarType> commuting_weights,
341341
std::vector<std::vector<size_t>> indices_of_saved_edges = {})
342342
{
343343
// mobility between nodes
344-
BOOST_OUTCOME_TRY(auto&& mobility_data_commuter,
345-
read_func((data_dir / "mobility" / "commuter_mobility.txt").string()));
346-
BOOST_OUTCOME_TRY(auto&& mobility_data_twitter,
347-
read_func((data_dir / "mobility" / "twitter_scaled_1252.txt").string()));
344+
BOOST_OUTCOME_TRY(auto&& mobility_data_commuter, read_func(mobility_data_file.string()));
348345
if (mobility_data_commuter.rows() != Eigen::Index(params_graph.nodes().size()) ||
349-
mobility_data_commuter.cols() != Eigen::Index(params_graph.nodes().size()) ||
350-
mobility_data_twitter.rows() != Eigen::Index(params_graph.nodes().size()) ||
351-
mobility_data_twitter.cols() != Eigen::Index(params_graph.nodes().size())) {
346+
mobility_data_commuter.cols() != Eigen::Index(params_graph.nodes().size())) {
352347
return mio::failure(mio::StatusCode::InvalidValue,
353348
"Mobility matrices do not have the correct size. You may need to run "
354349
"transformMobilitydata.py from pycode memilio epidata package.");
@@ -377,20 +372,9 @@ IOResult<void> set_edges(const fs::path& data_dir, Graph<Model, MobilityParams>&
377372
commuter_coeff_ij * commuting_weights[size_t(age)];
378373
}
379374
}
380-
//others
381-
auto total_population = populations.get_total();
382-
auto twitter_coeff = mobility_data_twitter(county_idx_i, county_idx_j) /
383-
total_population; //data is absolute numbers, we need relative
384-
for (auto age = AgeGroup(0); age < populations.template size<mio::AgeGroup>(); ++age) {
385-
for (auto compartment : mobile_compartments) {
386-
auto coeff_idx = populations.get_flat_index({age, compartment});
387-
mobility_coeffs[size_t(ContactLocation::Other)].get_baseline()[coeff_idx] = twitter_coeff;
388-
}
389-
}
390-
391375
//only add edges with mobility above thresholds for performance
392376
//thresholds are chosen empirically so that more than 99% of mobility is covered, approx. 1/3 of the edges
393-
if (commuter_coeff_ij > 4e-5 || twitter_coeff > 1e-5) {
377+
if (commuter_coeff_ij > 4e-5) {
394378
params_graph.add_edge(county_idx_i, county_idx_j, std::move(mobility_coeffs), indices_of_saved_edges);
395379
}
396380
}

cpp/models/ode_secir/parameters_io.h

Lines changed: 39 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -335,11 +335,10 @@ IOResult<void> export_input_data_county_timeseries(
335335
}
336336
#else
337337
template <class Model>
338-
IOResult<void>
339-
export_input_data_county_timeseries(std::vector<Model> models, const std::string& dir, std::vector<int> const& region,
340-
Date date, const std::vector<double>& scaling_factor_inf, double scaling_factor_icu,
341-
int num_days, const std::string& divi_data_path,
342-
const std::string& confirmed_cases_path, const std::string& population_data_path)
338+
IOResult<void> export_input_data_county_timeseries(
339+
std::vector<Model> models, const std::string& results_dir, std::vector<int> const& region, Date date,
340+
const std::vector<double>& scaling_factor_inf, double scaling_factor_icu, int num_days,
341+
const std::string& divi_data_path, const std::string& confirmed_cases_path, const std::string& population_data_path)
343342
{
344343
mio::log_warning("HDF5 not available. Cannot export time series of extrapolated real data.");
345344
return success();
@@ -352,18 +351,19 @@ export_input_data_county_timeseries(std::vector<Model> models, const std::string
352351
* @param[in] date Date for which the data should be read.
353352
* @param[in] scaling_factor_inf Factors by which to scale the confirmed cases of rki data.
354353
* @param[in] scaling_factor_icu Factor by which to scale the icu cases of divi data.
355-
* @param[in] dir Directory of files.
354+
* @param[in] pydata_dir Directory of files.
356355
*/
357356
template <class Model>
358357
IOResult<void> read_input_data_germany(std::vector<Model>& model, Date date,
359358
const std::vector<double>& scaling_factor_inf, double scaling_factor_icu,
360-
const std::string& dir)
359+
const std::string& pydata_dir)
361360
{
362361
BOOST_OUTCOME_TRY(
363-
details::set_divi_data(model, path_join(dir, "germany_divi.json"), {0}, date, scaling_factor_icu));
364-
BOOST_OUTCOME_TRY(details::set_confirmed_cases_data(model, path_join(dir, "cases_all_age_ma7.json"), {0}, date,
365-
scaling_factor_inf));
366-
BOOST_OUTCOME_TRY(details::set_population_data(model, path_join(dir, "county_current_population.json"), {0}));
362+
details::set_divi_data(model, path_join(pydata_dir, "germany_divi.json"), {0}, date, scaling_factor_icu));
363+
BOOST_OUTCOME_TRY(details::set_confirmed_cases_data(model, path_join(pydata_dir, "cases_all_age_ma7.json"), {0},
364+
date, scaling_factor_inf));
365+
BOOST_OUTCOME_TRY(
366+
details::set_population_data(model, path_join(pydata_dir, "county_current_population.json"), {0}));
367367
return success();
368368
}
369369

@@ -374,19 +374,20 @@ IOResult<void> read_input_data_germany(std::vector<Model>& model, Date date,
374374
* @param[in] state Vector of region keys of states of interest.
375375
* @param[in] scaling_factor_inf Factors by which to scale the confirmed cases of rki data.
376376
* @param[in] scaling_factor_icu Factor by which to scale the icu cases of divi data.
377-
* @param[in] dir Directory of files.
377+
* @param[in] pydata_dir Directory of files.
378378
*/
379379
template <class Model>
380380
IOResult<void> read_input_data_state(std::vector<Model>& model, Date date, std::vector<int>& state,
381381
const std::vector<double>& scaling_factor_inf, double scaling_factor_icu,
382-
const std::string& dir)
382+
const std::string& pydata_dir)
383383
{
384384

385385
BOOST_OUTCOME_TRY(
386-
details::set_divi_data(model, path_join(dir, "state_divi.json"), state, date, scaling_factor_icu));
387-
BOOST_OUTCOME_TRY(details::set_confirmed_cases_data(model, path_join(dir, "cases_all_state_age_ma7.json"), state,
388-
date, scaling_factor_inf));
389-
BOOST_OUTCOME_TRY(details::set_population_data(model, path_join(dir, "county_current_population.json"), state));
386+
details::set_divi_data(model, path_join(pydata_dir, "state_divi.json"), state, date, scaling_factor_icu));
387+
BOOST_OUTCOME_TRY(details::set_confirmed_cases_data(model, path_join(pydata_dir, "cases_all_state_age_ma7.json"),
388+
state, date, scaling_factor_inf));
389+
BOOST_OUTCOME_TRY(
390+
details::set_population_data(model, path_join(pydata_dir, "county_current_population.json"), state));
390391
return success();
391392
}
392393

@@ -397,33 +398,32 @@ IOResult<void> read_input_data_state(std::vector<Model>& model, Date date, std::
397398
* @param[in] county Vector of region keys of counties of interest.
398399
* @param[in] scaling_factor_inf Factors by which to scale the confirmed cases of rki data.
399400
* @param[in] scaling_factor_icu Factor by which to scale the icu cases of divi data.
400-
* @param[in] dir Directory of files.
401+
* @param[in] pydata_dir Directory of files.
401402
* @param[in] num_days [Default: 0] Number of days to be simulated; required to extrapolate real data.
402403
* @param[in] export_time_series [Default: false] If true, reads data for each day of simulation and writes it in the same directory as the input files.
403404
*/
404405
template <class Model>
405406
IOResult<void> read_input_data_county(std::vector<Model>& model, Date date, const std::vector<int>& county,
406407
const std::vector<double>& scaling_factor_inf, double scaling_factor_icu,
407-
const std::string& dir, int num_days = 0, bool export_time_series = false)
408+
const std::string& pydata_dir, int num_days = 0, bool export_time_series = false)
408409
{
409-
BOOST_OUTCOME_TRY(details::set_divi_data(model, path_join(dir, "pydata/Germany", "county_divi_ma7.json"), county,
410-
date, scaling_factor_icu));
411-
BOOST_OUTCOME_TRY(details::set_confirmed_cases_data(
412-
model, path_join(dir, "pydata/Germany", "cases_all_county_age_ma7.json"), county, date, scaling_factor_inf));
413-
BOOST_OUTCOME_TRY(details::set_population_data(
414-
model, path_join(dir, "pydata/Germany", "county_current_population.json"), county));
410+
BOOST_OUTCOME_TRY(
411+
details::set_divi_data(model, path_join(pydata_dir, "county_divi_ma7.json"), county, date, scaling_factor_icu));
412+
BOOST_OUTCOME_TRY(details::set_confirmed_cases_data(model, path_join(pydata_dir, "cases_all_county_age_ma7.json"),
413+
county, date, scaling_factor_inf));
414+
BOOST_OUTCOME_TRY(
415+
details::set_population_data(model, path_join(pydata_dir, "county_current_population.json"), county));
415416

416417
if (export_time_series) {
417418
// Use only if extrapolated real data is needed for comparison. EXPENSIVE !
418419
// Run time equals run time of the previous functions times the num_days !
419420
// (This only represents the vectorization of the previous function over all simulation days...)
420421
log_warning("Exporting time series of extrapolated real data. This may take some minutes. "
421422
"For simulation runs over the same time period, deactivate it.");
422-
BOOST_OUTCOME_TRY(
423-
export_input_data_county_timeseries(model, dir, county, date, scaling_factor_inf, scaling_factor_icu,
424-
num_days, path_join(dir, "pydata/Germany", "county_divi_ma7.json"),
425-
path_join(dir, "pydata/Germany", "cases_all_county_age_ma7.json"),
426-
path_join(dir, "pydata/Germany", "county_current_population.json")));
423+
BOOST_OUTCOME_TRY(export_input_data_county_timeseries(
424+
model, pydata_dir, county, date, scaling_factor_inf, scaling_factor_icu, num_days,
425+
path_join(pydata_dir, "county_divi_ma7.json"), path_join(pydata_dir, "cases_all_county_age_ma7.json"),
426+
path_join(pydata_dir, "county_current_population.json")));
427427
}
428428
return success();
429429
}
@@ -435,19 +435,19 @@ IOResult<void> read_input_data_county(std::vector<Model>& model, Date date, cons
435435
* @param[in] county vector of region keys of interest
436436
* @param[in] scaling_factor_inf factors by which to scale the confirmed cases of rki data
437437
* @param[in] scaling_factor_icu factor by which to scale the icu cases of divi data
438-
* @param[in] dir directory of files
438+
* @param[in] pydata_dir directory of files
439439
* @param[in] age_group_names strings specifying age group names
440440
*/
441441
template <class Model>
442442
IOResult<void> read_input_data(std::vector<Model>& model, Date date, const std::vector<int>& node_ids,
443443
const std::vector<double>& scaling_factor_inf, double scaling_factor_icu,
444-
const std::string& data_dir, int num_days = 0, bool export_time_series = false)
444+
const std::string& pydata_dir, int num_days = 0, bool export_time_series = false)
445445
{
446-
BOOST_OUTCOME_TRY(
447-
details::set_divi_data(model, path_join(data_dir, "critical_cases.json"), node_ids, date, scaling_factor_icu));
448-
BOOST_OUTCOME_TRY(details::set_confirmed_cases_data(model, path_join(data_dir, "confirmed_cases.json"), node_ids,
446+
BOOST_OUTCOME_TRY(details::set_divi_data(model, path_join(pydata_dir, "critical_cases.json"), node_ids, date,
447+
scaling_factor_icu));
448+
BOOST_OUTCOME_TRY(details::set_confirmed_cases_data(model, path_join(pydata_dir, "confirmed_cases.json"), node_ids,
449449
date, scaling_factor_inf));
450-
BOOST_OUTCOME_TRY(details::set_population_data(model, path_join(data_dir, "population_data.json"), node_ids));
450+
BOOST_OUTCOME_TRY(details::set_population_data(model, path_join(pydata_dir, "population_data.json"), node_ids));
451451

452452
if (export_time_series) {
453453
// Use only if extrapolated real data is needed for comparison. EXPENSIVE !
@@ -456,9 +456,9 @@ IOResult<void> read_input_data(std::vector<Model>& model, Date date, const std::
456456
log_warning("Exporting time series of extrapolated real data. This may take some minutes. "
457457
"For simulation runs over the same time period, deactivate it.");
458458
BOOST_OUTCOME_TRY(export_input_data_county_timeseries(
459-
model, data_dir, node_ids, date, scaling_factor_inf, scaling_factor_icu, num_days,
460-
path_join(data_dir, "critical_cases.json"), path_join(data_dir, "confirmed_cases.json"),
461-
path_join(data_dir, "population_data.json")));
459+
model, pydata_dir, node_ids, date, scaling_factor_inf, scaling_factor_icu, num_days,
460+
path_join(pydata_dir, "critical_cases.json"), path_join(pydata_dir, "confirmed_cases.json"),
461+
path_join(pydata_dir, "population_data.json")));
462462
}
463463
return success();
464464
}

0 commit comments

Comments
 (0)