lisphilar · lisphilar · Aug 20, 2020 · Aug 19, 2020 · Aug 19, 2020 · Aug 19, 2020
diff --git a/covsirphy/cleaning/jhu_data.py b/covsirphy/cleaning/jhu_data.py
@@ -178,16 +178,17 @@ def subset(self, country, province=None,
             Records with Recovered > 0 will be selected.
         """
         # Subset with area and start/end date
-        df = super().subset(
+        subset_df = super().subset(
             country=country, province=province, start_date=start_date, end_date=end_date)
         # Select records where Recovered > 0
-        df = df.loc[df[self.R] > 0, :]
+        df = subset_df.loc[subset_df[self.R] > 0, :]
         if df.empty:
+            series = subset_df[self.DATE]
+            start_date = start_date or series.min().strftime(self.DATE_FORMAT)
+            end_date = end_date or series.max().strftime(self.DATE_FORMAT)
             s1 = "Records with Recovered > 0 are not registered."
             s2 = f"(country={country}, province={province}, period={start_date}-{end_date})"
-            raise ValueError(
-                f"{s1} {s2}"
-            )
+            raise ValueError(f"{s1} {s2}")
         # Calculate Susceptible if population value was applied
         if population is None:
             return df

diff --git a/covsirphy/ode/mbase.py b/covsirphy/ode/mbase.py
@@ -19,8 +19,8 @@ class ModelBase(Term):
     # Variable names in (non-dim, dimensional) ODEs
     VAR_DICT = dict()
     VARIABLES = list(VAR_DICT.values())
-    # Priorities of the variables when optimization
-    PRIORITIES = np.array(list())
+    # Weights of variables in parameter estimation error function
+    WEIGHTS = np.array(list())
     # Variables that increases monotonically
     VARS_INCLEASE = list()
     # Example set of parameters and initial values

diff --git a/covsirphy/ode/sewirf.py b/covsirphy/ode/sewirf.py
@@ -28,8 +28,8 @@ class SEWIRF(ModelBase):
         "x3": ModelBase.W,
     }
     VARIABLES = list(VAR_DICT.values())
-    # Priorities of the variables when optimization
-    PRIORITIES = np.array([0, 10, 10, 2, 0, 0])
+    # Weights of variables in parameter estimation error function
+    WEIGHTS = np.array([0, 10, 10, 2, 0, 0])
     # Variables that increases monotonically
     VARS_INCLEASE = [ModelBase.R, ModelBase.F]
     # Example set of parameters and initial values

diff --git a/covsirphy/ode/sir.py b/covsirphy/ode/sir.py
@@ -26,8 +26,8 @@ class SIR(ModelBase):
         "z": ModelBase.FR
     }
     VARIABLES = list(VAR_DICT.values())
-    # Priorities of the variables when optimization
-    PRIORITIES = np.array([1, 1, 1])
+    # Weights of variables in parameter estimation error function
+    WEIGHTS = np.array([1, 1, 1])
     # Variables that increases monotonically
     VARS_INCLEASE = [ModelBase.FR]
     # Example set of parameters and initial values

diff --git a/covsirphy/ode/sird.py b/covsirphy/ode/sird.py
@@ -28,8 +28,8 @@ class SIRD(ModelBase):
         "w": ModelBase.F
     }
     VARIABLES = list(VAR_DICT.values())
-    # Priorities of the variables when optimization
-    PRIORITIES = np.array([1, 10, 10, 2])
+    # Weights of variables in parameter estimation error function
+    WEIGHTS = np.array([1, 10, 10, 2])
     # Variables that increases monotonically
     VARS_INCLEASE = [ModelBase.R, ModelBase.F]
     # Example set of parameters and initial values

diff --git a/covsirphy/ode/sirf.py b/covsirphy/ode/sirf.py
@@ -31,8 +31,8 @@ class SIRF(ModelBase):
         "w": ModelBase.F
     }
     VARIABLES = list(VAR_DICT.values())
-    # Priorities of the variables when optimization
-    PRIORITIES = np.array([1, 10, 10, 2])
+    # Weights of variables in parameter estimation error function
+    WEIGHTS = np.array([1, 1, 1, 1])
     # Variables that increases monotonically
     VARS_INCLEASE = [ModelBase.R, ModelBase.F]
     # Example set of parameters and initial values

diff --git a/covsirphy/ode/sirfv.py b/covsirphy/ode/sirfv.py
@@ -34,8 +34,8 @@ class SIRFV(ModelBase):
         "v": ModelBase.V
     }
     VARIABLES = list(VAR_DICT.values())
-    # Priorities of the variables when optimization
-    PRIORITIES = np.array([0, 10, 10, 2, 0])
+    # Weights of variables in parameter estimation error function
+    WEIGHTS = np.array([0, 10, 10, 2, 0])
     # Variables that increases monotonically
     VARS_INCLEASE = [ModelBase.R, ModelBase.F]
     # Example set of parameters and initial values

diff --git a/covsirphy/simulation/estimator.py b/covsirphy/simulation/estimator.py
@@ -39,7 +39,6 @@ def __init__(self, record_df, model, population, tau=None, **kwargs):
         # Arguments
         self.population = self.ensure_population(population)
         self.model = self.ensure_subclass(model, ModelBase, name="model")
-        self.tau = self.ensure_tau(tau)
         # Dataset
         if isinstance(record_df, JHUData):
             subset_arg_dict = find_args(
@@ -66,6 +65,9 @@ def __init__(self, record_df, model, population, tau=None, **kwargs):
         optuna.logging.disable_default_handler()
         self.x = self.TS
         self.y_list = model.VARIABLES[:]
+        self.weight_dict = {
+            v: p for (v, p) in zip(model.VARIABLES, model.WEIGHTS) if p > 0
+        }
         self.study = None
         self.total_trials = 0
         self.run_time = 0
@@ -74,6 +76,9 @@ def __init__(self, record_df, model, population, tau=None, **kwargs):
         self.train_df = None
         # step_n will be defined in divide_minutes()
         self.step_n = None
+        # tau value
+        self.tau = self.ensure_tau(tau)
+        self.taufree_df = pd.DataFrame() if tau is None else self.divide_minutes(tau)
 
     def run(self, timeout=60, reset_n_max=3,
             timeout_iteration=5, allowance=(0.98, 1.02), seed=0, **kwargs):
@@ -140,10 +145,8 @@ def _is_in_allowance(self, comp_df, allowance):
         Returns:
             (bool): True when all max values of predicted values are in allowance
         """
-        df = self.ensure_dataframe(comp_df, name="comp_df")
-        variables = self.model.VARIABLES[:]
-        a_max_values = [df[f"{v}{self.A}"].max() for v in variables]
-        p_max_values = [df[f"{v}{self.P}"].max() for v in variables]
+        a_max_values = [comp_df[f"{v}{self.A}"].max() for v in self.y_list]
+        p_max_values = [comp_df[f"{v}{self.P}"].max() for v in self.y_list]
         allowance0, allowance1 = allowance
         ok_list = [
             (a * allowance0 <= p) and (p <= a * allowance1)
@@ -163,10 +166,10 @@ def objective(self, trial):
             (float): score of the error function to minimize
         """
         # Convert T to t using tau
-        tau = self.tau
-        if tau is None:
+        taufree_df = self.taufree_df.copy()
+        if taufree_df.empty:
             tau = trial.suggest_categorical(self.TAU, self.tau_candidates)
-        taufree_df = self.divide_minutes(tau)
+            taufree_df = self.divide_minutes(tau)
         # Set parameters of the models
         model_param_dict = self.model.param_range(
             taufree_df, self.population)
@@ -217,24 +220,38 @@ def error_f(self, param_dict, taufree_df):
             (float): score of the error function to minimize
         """
         sim_df = self.simulate(self.step_n, param_dict)
-        df = self.compare(taufree_df, sim_df)
+        comp_df = self.compare(taufree_df, sim_df)
         # Calculate error score
-        v_list = [
-            v for (p, v)
-            in zip(self.model.PRIORITIES, self.model.VARIABLES)
-            if p > 0
-        ]
-        diffs = [df[f"{v}{self.A}"] - df[f"{v}{self.P}"] for v in v_list]
-        numerators = [df[f"{v}{self.A}"] + 1 for v in v_list]
         try:
             return sum(
-                p * np.average(diff.abs() / numerator, weights=df.index)
-                for (p, diff, numerator)
-                in zip(self.model.PRIORITIES, diffs, numerators)
+                self._score(variable, comp_df)
+                for variable in self.weight_dict.keys()
             )
         except (ZeroDivisionError, TypeError):
             return np.inf
 
+    def _score(self, v, comp_df):
+        """
+        Calculate score of the variable.
+
+        Args:
+            v (str): variable na,e
+            com_df (pandas.DataFrame):
+                Index:
+                    (str): time step
+                Columns:
+                    - columns with "_actual"
+                    - columns with "_predicted"
+                    - columns are defined by self.y_list
+
+        Returns:
+            float: score
+        """
+        weight = self.weight_dict[v]
+        actual = comp_df[f"{v}{self.A}"]
+        diff = (actual - comp_df[f"{v}{self.P}"]).abs() / (actual + 1)
+        return weight * diff.mean()
+
     def simulate(self, step_n, param_dict):
         """
         Simulate the values with the parameters.
@@ -350,7 +367,7 @@ def accuracy(self, show_figure=True, filename=None):
         train_df = self.divide_minutes(est_dict[self.TAU])
         use_variables = [
             v for (i, (p, v))
-            in enumerate(zip(self.model.PRIORITIES, self.model.VARIABLES))
+            in enumerate(zip(self.model.WEIGHTS, self.model.VARIABLES))
             if p != 0 and i != 0
         ]
         return super().accuracy(

diff --git a/covsirphy/simulation/optimize.py b/covsirphy/simulation/optimize.py
@@ -165,24 +165,15 @@ def compare(self, actual_df, predicted_df):
             (pandas.DataFrame):
                 Index:
                     (str): time step
-                Index:
-                    reset index
                 Columns:
                     - columns with "_actual"
                     - columns with "_predicted:
                     - columns are defined by self.y_list
         """
-        # Check the arguments
-        if not set(self.y_list).issubset(set(predicted_df.columns)):
-            y_str = ", ".join(self.y_list)
-            raise KeyError(f"@predicted_df must have {y_str} columns.")
         # Data for comparison
         df = pd.merge(
-            actual_df, predicted_df, on=self.x,
-            suffixes=(self.A, self.P)
-        )
-        df = df.set_index(self.x)
-        return df
+            actual_df, predicted_df, on=self.x, suffixes=(self.A, self.P))
+        return df.set_index(self.x)
 
     def param(self):
         """

diff --git a/tests/test_change_finder.py b/tests/test_change_finder.py
@@ -31,7 +31,7 @@ def test_find_with_small_min_size(self, jhu_data, population_data):
     def test_find_with_few_records(self, jhu_data, population_data):
         population = population_data.value("Italy")
         sr_df = jhu_data.to_sr(
-            country="Italy", population=population, end_date="23Feb2020")
+            country="Italy", population=population, end_date="24Feb2020")
         with pytest.raises(ValueError):
             min_size = 7
             change_finder = ChangeFinder(sr_df, min_size=min_size)

diff --git a/tests/test_phase_series.py b/tests/test_phase_series.py
@@ -31,7 +31,7 @@ def test_add_phase(self, jhu_data, population_data, country):
         # Add a phase with specified population value: 1st
         with pytest.raises(ValueError):
             series.add(end_date="22Apr2020")
-        series.add(end_date="05May2020", population=population * 0.98)
+        series.add(end_date="05May2020", population=int(population * 0.98))
         # Add a phase with specified the number of days: 2nd
         series.add(days=21)
         # Filling past phases and add a future phase: 3rd, 4th

diff --git a/tests/test_scenario.py b/tests/test_scenario.py
@@ -146,7 +146,7 @@ def test_edit(self, jhu_data, population_data, country):
         # Combine
         length = len(snl["Main"])
         snl.combine(["1st", "2nd"])
-        n_changed = population_data.value(country) * 0.98
+        n_changed = int(population_data.value(country) * 0.98)
         snl.combine(["2nd", "3rd"], population=n_changed)
         assert len(snl["Main"]) == length - 2
         # Separate