Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue196 #197

Merged
merged 8 commits into from
Aug 20, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions covsirphy/cleaning/jhu_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,16 +178,17 @@ def subset(self, country, province=None,
Records with Recovered > 0 will be selected.
"""
# Subset with area and start/end date
df = super().subset(
subset_df = super().subset(
country=country, province=province, start_date=start_date, end_date=end_date)
# Select records where Recovered > 0
df = df.loc[df[self.R] > 0, :]
df = subset_df.loc[subset_df[self.R] > 0, :]
if df.empty:
series = subset_df[self.DATE]
start_date = start_date or series.min().strftime(self.DATE_FORMAT)
end_date = end_date or series.max().strftime(self.DATE_FORMAT)
s1 = "Records with Recovered > 0 are not registered."
s2 = f"(country={country}, province={province}, period={start_date}-{end_date})"
raise ValueError(
f"{s1} {s2}"
)
raise ValueError(f"{s1} {s2}")
# Calculate Susceptible if population value was applied
if population is None:
return df
Expand Down
4 changes: 2 additions & 2 deletions covsirphy/ode/mbase.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ class ModelBase(Term):
# Variable names in (non-dim, dimensional) ODEs
VAR_DICT = dict()
VARIABLES = list(VAR_DICT.values())
# Priorities of the variables when optimization
PRIORITIES = np.array(list())
# Weights of variables in parameter estimation error function
WEIGHTS = np.array(list())
# Variables that increases monotonically
VARS_INCLEASE = list()
# Example set of parameters and initial values
Expand Down
4 changes: 2 additions & 2 deletions covsirphy/ode/sewirf.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ class SEWIRF(ModelBase):
"x3": ModelBase.W,
}
VARIABLES = list(VAR_DICT.values())
# Priorities of the variables when optimization
PRIORITIES = np.array([0, 10, 10, 2, 0, 0])
# Weights of variables in parameter estimation error function
WEIGHTS = np.array([0, 10, 10, 2, 0, 0])
# Variables that increases monotonically
VARS_INCLEASE = [ModelBase.R, ModelBase.F]
# Example set of parameters and initial values
Expand Down
4 changes: 2 additions & 2 deletions covsirphy/ode/sir.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ class SIR(ModelBase):
"z": ModelBase.FR
}
VARIABLES = list(VAR_DICT.values())
# Priorities of the variables when optimization
PRIORITIES = np.array([1, 1, 1])
# Weights of variables in parameter estimation error function
WEIGHTS = np.array([1, 1, 1])
# Variables that increases monotonically
VARS_INCLEASE = [ModelBase.FR]
# Example set of parameters and initial values
Expand Down
4 changes: 2 additions & 2 deletions covsirphy/ode/sird.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ class SIRD(ModelBase):
"w": ModelBase.F
}
VARIABLES = list(VAR_DICT.values())
# Priorities of the variables when optimization
PRIORITIES = np.array([1, 10, 10, 2])
# Weights of variables in parameter estimation error function
WEIGHTS = np.array([1, 10, 10, 2])
# Variables that increases monotonically
VARS_INCLEASE = [ModelBase.R, ModelBase.F]
# Example set of parameters and initial values
Expand Down
4 changes: 2 additions & 2 deletions covsirphy/ode/sirf.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ class SIRF(ModelBase):
"w": ModelBase.F
}
VARIABLES = list(VAR_DICT.values())
# Priorities of the variables when optimization
PRIORITIES = np.array([1, 10, 10, 2])
# Weights of variables in parameter estimation error function
WEIGHTS = np.array([1, 1, 1, 1])
# Variables that increases monotonically
VARS_INCLEASE = [ModelBase.R, ModelBase.F]
# Example set of parameters and initial values
Expand Down
4 changes: 2 additions & 2 deletions covsirphy/ode/sirfv.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ class SIRFV(ModelBase):
"v": ModelBase.V
}
VARIABLES = list(VAR_DICT.values())
# Priorities of the variables when optimization
PRIORITIES = np.array([0, 10, 10, 2, 0])
# Weights of variables in parameter estimation error function
WEIGHTS = np.array([0, 10, 10, 2, 0])
# Variables that increases monotonically
VARS_INCLEASE = [ModelBase.R, ModelBase.F]
# Example set of parameters and initial values
Expand Down
57 changes: 37 additions & 20 deletions covsirphy/simulation/estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ def __init__(self, record_df, model, population, tau=None, **kwargs):
# Arguments
self.population = self.ensure_population(population)
self.model = self.ensure_subclass(model, ModelBase, name="model")
self.tau = self.ensure_tau(tau)
# Dataset
if isinstance(record_df, JHUData):
subset_arg_dict = find_args(
Expand All @@ -66,6 +65,9 @@ def __init__(self, record_df, model, population, tau=None, **kwargs):
optuna.logging.disable_default_handler()
self.x = self.TS
self.y_list = model.VARIABLES[:]
self.weight_dict = {
v: p for (v, p) in zip(model.VARIABLES, model.WEIGHTS) if p > 0
}
self.study = None
self.total_trials = 0
self.run_time = 0
Expand All @@ -74,6 +76,9 @@ def __init__(self, record_df, model, population, tau=None, **kwargs):
self.train_df = None
# step_n will be defined in divide_minutes()
self.step_n = None
# tau value
self.tau = self.ensure_tau(tau)
self.taufree_df = pd.DataFrame() if tau is None else self.divide_minutes(tau)

def run(self, timeout=60, reset_n_max=3,
timeout_iteration=5, allowance=(0.98, 1.02), seed=0, **kwargs):
Expand Down Expand Up @@ -140,10 +145,8 @@ def _is_in_allowance(self, comp_df, allowance):
Returns:
(bool): True when all max values of predicted values are in allowance
"""
df = self.ensure_dataframe(comp_df, name="comp_df")
variables = self.model.VARIABLES[:]
a_max_values = [df[f"{v}{self.A}"].max() for v in variables]
p_max_values = [df[f"{v}{self.P}"].max() for v in variables]
a_max_values = [comp_df[f"{v}{self.A}"].max() for v in self.y_list]
p_max_values = [comp_df[f"{v}{self.P}"].max() for v in self.y_list]
allowance0, allowance1 = allowance
ok_list = [
(a * allowance0 <= p) and (p <= a * allowance1)
Expand All @@ -163,10 +166,10 @@ def objective(self, trial):
(float): score of the error function to minimize
"""
# Convert T to t using tau
tau = self.tau
if tau is None:
taufree_df = self.taufree_df.copy()
if taufree_df.empty:
tau = trial.suggest_categorical(self.TAU, self.tau_candidates)
taufree_df = self.divide_minutes(tau)
taufree_df = self.divide_minutes(tau)
# Set parameters of the models
model_param_dict = self.model.param_range(
taufree_df, self.population)
Expand Down Expand Up @@ -217,24 +220,38 @@ def error_f(self, param_dict, taufree_df):
(float): score of the error function to minimize
"""
sim_df = self.simulate(self.step_n, param_dict)
df = self.compare(taufree_df, sim_df)
comp_df = self.compare(taufree_df, sim_df)
# Calculate error score
v_list = [
v for (p, v)
in zip(self.model.PRIORITIES, self.model.VARIABLES)
if p > 0
]
diffs = [df[f"{v}{self.A}"] - df[f"{v}{self.P}"] for v in v_list]
numerators = [df[f"{v}{self.A}"] + 1 for v in v_list]
try:
return sum(
p * np.average(diff.abs() / numerator, weights=df.index)
for (p, diff, numerator)
in zip(self.model.PRIORITIES, diffs, numerators)
self._score(variable, comp_df)
for variable in self.weight_dict.keys()
)
except (ZeroDivisionError, TypeError):
return np.inf

def _score(self, v, comp_df):
"""
Calculate score of the variable.

Args:
v (str): variable na,e
com_df (pandas.DataFrame):
Index:
(str): time step
Columns:
- columns with "_actual"
- columns with "_predicted"
- columns are defined by self.y_list

Returns:
float: score
"""
weight = self.weight_dict[v]
actual = comp_df[f"{v}{self.A}"]
diff = (actual - comp_df[f"{v}{self.P}"]).abs() / (actual + 1)
return weight * diff.mean()

def simulate(self, step_n, param_dict):
"""
Simulate the values with the parameters.
Expand Down Expand Up @@ -350,7 +367,7 @@ def accuracy(self, show_figure=True, filename=None):
train_df = self.divide_minutes(est_dict[self.TAU])
use_variables = [
v for (i, (p, v))
in enumerate(zip(self.model.PRIORITIES, self.model.VARIABLES))
in enumerate(zip(self.model.WEIGHTS, self.model.VARIABLES))
if p != 0 and i != 0
]
return super().accuracy(
Expand Down
13 changes: 2 additions & 11 deletions covsirphy/simulation/optimize.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,24 +165,15 @@ def compare(self, actual_df, predicted_df):
(pandas.DataFrame):
Index:
(str): time step
Index:
reset index
Columns:
- columns with "_actual"
- columns with "_predicted:
- columns are defined by self.y_list
"""
# Check the arguments
if not set(self.y_list).issubset(set(predicted_df.columns)):
y_str = ", ".join(self.y_list)
raise KeyError(f"@predicted_df must have {y_str} columns.")
# Data for comparison
df = pd.merge(
actual_df, predicted_df, on=self.x,
suffixes=(self.A, self.P)
)
df = df.set_index(self.x)
return df
actual_df, predicted_df, on=self.x, suffixes=(self.A, self.P))
return df.set_index(self.x)

def param(self):
"""
Expand Down
2 changes: 1 addition & 1 deletion tests/test_change_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def test_find_with_small_min_size(self, jhu_data, population_data):
def test_find_with_few_records(self, jhu_data, population_data):
population = population_data.value("Italy")
sr_df = jhu_data.to_sr(
country="Italy", population=population, end_date="23Feb2020")
country="Italy", population=population, end_date="24Feb2020")
with pytest.raises(ValueError):
min_size = 7
change_finder = ChangeFinder(sr_df, min_size=min_size)
Expand Down
2 changes: 1 addition & 1 deletion tests/test_phase_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def test_add_phase(self, jhu_data, population_data, country):
# Add a phase with specified population value: 1st
with pytest.raises(ValueError):
series.add(end_date="22Apr2020")
series.add(end_date="05May2020", population=population * 0.98)
series.add(end_date="05May2020", population=int(population * 0.98))
# Add a phase with specified the number of days: 2nd
series.add(days=21)
# Filling past phases and add a future phase: 3rd, 4th
Expand Down
2 changes: 1 addition & 1 deletion tests/test_scenario.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def test_edit(self, jhu_data, population_data, country):
# Combine
length = len(snl["Main"])
snl.combine(["1st", "2nd"])
n_changed = population_data.value(country) * 0.98
n_changed = int(population_data.value(country) * 0.98)
snl.combine(["2nd", "3rd"], population=n_changed)
assert len(snl["Main"]) == length - 2
# Separate
Expand Down