Fix some details.

microsoft · you-n-g · Jan 10, 2022 · Jul 1, 2021 · Jul 1, 2021 · Jul 8, 2021
commit 5a184ebd85edae995e9fc972d3473f2c1b83ddbe
diff --git a/examples/benchmarks_dynamic/DDG-DA/workflow.py b/examples/benchmarks_dynamic/DDG-DA/workflow.py
@@ -151,7 +151,7 @@ def train_meta_model(self):
         kwargs = dict(
             task_tpl=proxy_forecast_model_task,
             step=self.step,
-            segments=0.62,
+            segments=0.62,  # keep test period consistent with the dataset yaml
             trunc_days=1 + self.horizon,
             hist_step_n=30,
             fill_method="max",

diff --git a/examples/benchmarks_dynamic/baseline/rolling_benchmark.py b/examples/benchmarks_dynamic/baseline/rolling_benchmark.py
@@ -35,10 +35,10 @@ def basic_task(self):
         if self.model_type == "gbdt":
             conf_path = DIRNAME.parent.parent / "benchmarks" / "LightGBM" / "workflow_config_lightgbm_Alpha158.yaml"
             # dump the processed data on to disk for later loading to speed up the processing
-            h_path = DIRNAME / "lightgbm_alpha158_handler.pkl"
+            h_path = DIRNAME / "lightgbm_alpha158_handler_horizon{}.pkl".format(self.horizon)
         elif self.model_type == "linear":
             conf_path = DIRNAME.parent.parent / "benchmarks" / "Linear" / "workflow_config_linear_Alpha158.yaml"
-            h_path = DIRNAME / "linear_alpha158_handler.pkl"
+            h_path = DIRNAME / "linear_alpha158_handler_horizon{}.pkl".format(self.horizon)
         else:
             raise AssertionError("Model type is not supported!")
         with conf_path.open("r") as f:
@@ -51,10 +51,10 @@ def basic_task(self):
 
         task = conf["task"]
 
-        # if not h_path.exists():
-        h_conf = task["dataset"]["kwargs"]["handler"]
-        h = init_instance_by_config(h_conf)
-        h.to_pickle(h_path, dump_all=True)
+        if not h_path.exists():
+            h_conf = task["dataset"]["kwargs"]["handler"]
+            h = init_instance_by_config(h_conf)
+            h.to_pickle(h_path, dump_all=True)
 
         task["dataset"]["kwargs"]["handler"] = f"file://{h_path}"
         task["record"] = ["qlib.workflow.record_temp.SignalRecord"]

diff --git a/qlib/contrib/meta/data_selection/dataset.py b/qlib/contrib/meta/data_selection/dataset.py
@@ -177,11 +177,6 @@ def __init__(self, task: dict, meta_info: pd.DataFrame, mode: str = MetaTask.PRO
                 )
             )
 
-            # debug: record the test period of the current meta-task instance
-            self.test_period = (
-                d_test["feature"].index.get_level_values("datetime")[0],
-                d_test["feature"].index.get_level_values("datetime")[-1],
-            )
         # TODO: set device: I think this is not necessary to converting data format.
         self.processed_meta_input = data_to_tensor(self.processed_meta_input)
 

diff --git a/qlib/contrib/meta/data_selection/model.py b/qlib/contrib/meta/data_selection/model.py
@@ -64,7 +64,7 @@ def __init__(
         self.lr = lr
         self.max_epoch = max_epoch
         self.fitted = False
-        torch.manual_seed(43)
+        torch.manual_seed(seed)
 
     def run_epoch(self, phase, task_list, epoch, opt, loss_l, ignore_weight=False):
         if phase == "train":
@@ -144,7 +144,7 @@ def fit(self, meta_dataset: MetaDatasetDS):
 
         if len(meta_tasks_l[1]):
             R.log_params(
-                **dict(proxy_test_begin=meta_tasks_l[1][0].test_period)
+                **dict(proxy_test_begin=meta_tasks_l[1][0].task["dataset"]["kwargs"]["segments"]["test"])
             )  # debug: record when the test phase starts
 
         self.tn = PredNet(

diff --git a/qlib/contrib/meta/data_selection/net.py b/qlib/contrib/meta/data_selection/net.py
@@ -11,7 +11,7 @@
 
 class TimeWeightMeta(SingleMetaBase):
     def __init__(self, hist_step_n, clip_weight=None, clip_method="clamp"):
-        # method 可以选 tanh 或者 clamp
+        # clip_method includes "tanh" or "clamp"
         super().__init__(hist_step_n, clip_weight, clip_method)
         self.linear = nn.Linear(hist_step_n, 1)
         self.k = nn.Parameter(torch.Tensor([8.0]))
@@ -22,13 +22,11 @@ def forward(self, time_perf, time_belong=None, return_preds=False):
         time_perf = time_perf.reshape(hist_step_n, time_perf.shape[0] // hist_step_n, *time_perf.shape[1:])
         time_perf = torch.mean(time_perf, dim=1, keepdim=False)
 
-        # time_perf的格式和其他的有一些不一样
-        # 需要自己拆出train和test
         preds = []
         for i in range(time_perf.shape[1]):
             preds.append(self.linear(time_perf[:, i]))
         preds = torch.cat(preds)
-        preds = preds - torch.mean(preds)  # 这里注意一下不要引入未来信息
+        preds = preds - torch.mean(preds)  # avoid using future information
         preds = preds * self.k
         if return_preds:
             if time_belong is None: