From f0cd1dfd3b6bc3e08ec5d899d7c1a5eac63d62e0 Mon Sep 17 00:00:00 2001
From: you-n-g <you-n-g@users.noreply.github.com>
Date: Mon, 17 Jan 2022 13:57:44 +0800
Subject: [PATCH] Fix code and docs for issues (#853)

* Docs for model and strategy

* add some docs about workflow and online

* safe_load yaml

* DDG-DA paper link and comments for code
---
 docs/component/model.rst                      |  3 ++
 docs/component/online.rst                     |  4 ++
 docs/component/strategy.rst                   |  2 +
 docs/component/workflow.rst                   | 40 ++++++++++++++++++-
 examples/benchmarks_dynamic/DDG-DA/README.md  |  3 ++
 .../benchmarks_dynamic/DDG-DA/workflow.py     |  3 ++
 examples/run_all_model.py                     |  2 +-
 7 files changed, 55 insertions(+), 2 deletions(-)

diff --git a/docs/component/model.rst b/docs/component/model.rst
index d9d59a4d70..61948bc2bb 100644
--- a/docs/component/model.rst
+++ b/docs/component/model.rst
@@ -106,6 +106,9 @@ Example
         `SignalRecord` is the `Record Template` in ``Qlib``, please refer to `Workflow <recorder.html#record-template>`_.
 
 Also, the above example has been given in ``examples/train_backtest_analyze.ipynb``.
+Technically, the meaning of the model prediction depends on the label setting designed by user.
+By default, the meaning of the score is normally the rating of the instruments by the forecasting model. The higher the score, the more profit the instruments. 
+
 
 Custom Model
 ===================
diff --git a/docs/component/online.rst b/docs/component/online.rst
index 22a6afaf9c..a77045b530 100644
--- a/docs/component/online.rst
+++ b/docs/component/online.rst
@@ -23,6 +23,10 @@ The `examples <https://github.com/microsoft/qlib/tree/main/examples/online_srv>`
 
 **NOTE**: User should keep his data source updated to support online serving. For example, Qlib provides `a batch of scripts <https://github.com/microsoft/qlib/blob/main/scripts/data_collector/yahoo/README.md#automatic-update-of-daily-frequency-datafrom-yahoo-finance>`_ to help users update Yahoo daily data.
 
+Known limitations currently
+- Currently, the daily updating prediction for the next trading day is supported. But generating orders for the next trading day is not supported due to the `limitations of public data <https://github.com/microsoft/qlib/issues/215#issuecomment-766293563>_`
+
+
 Online Manager
 =============
 
diff --git a/docs/component/strategy.rst b/docs/component/strategy.rst
index 3e379e8fcd..1b27e8e2c2 100644
--- a/docs/component/strategy.rst
+++ b/docs/component/strategy.rst
@@ -29,6 +29,8 @@ Qlib provides a base class ``qlib.strategy.base.BaseStrategy``. All strategy cla
 
 - `generate_order_list`
     Return the order list.
+    The frequency to call this method depends on the executor frequency("time_per_step"="day" by default). But the trading frequency can be decided by users' implementation.
+    For example, if the user wants to trading in weekly while the `time_per_step` is "day" in executor, user can return non-empty TradeDecision weekly(otherwise return empty like `this <https://github.com/microsoft/qlib/blob/main/qlib/contrib/strategy/signal_strategy.py#L132>`_ ).
 
 Users can inherit `BaseStrategy` to customize their strategy class.
 
diff --git a/docs/component/workflow.rst b/docs/component/workflow.rst
index 1b15212acf..af28f5264c 100644
--- a/docs/component/workflow.rst
+++ b/docs/component/workflow.rst
@@ -124,9 +124,47 @@ Configuration File
 ===================
 
 Let's get into details of ``qrun`` in this section.
-
 Before using ``qrun``, users need to prepare a configuration file. The following content shows how to prepare each part of the configuration file.
 
+The design logic of the configuration file is very simple. It predefines fixed workflows and provide this yaml interface to users to define how to initialize each component. 
+It follow the design of `init_instance_by_config <https://github.com/microsoft/qlib/blob/2aee9e0145decc3e71def70909639b5e5a6f4b58/qlib/utils/__init__.py#L264>`_ .  It defines the initialization of each component of Qlib, which typically include the class and the initialization arguments.
+
+For example, the following yaml and code are equivalent.
+
+.. code-block:: YAML
+
+    model:
+        class: LGBModel
+        module_path: qlib.contrib.model.gbdt
+        kwargs:
+            loss: mse
+            colsample_bytree: 0.8879
+            learning_rate: 0.0421
+            subsample: 0.8789
+            lambda_l1: 205.6999
+            lambda_l2: 580.9768
+            max_depth: 8
+            num_leaves: 210
+            num_threads: 20
+
+
+.. code-block:: python
+
+        from qlib.contrib.model.gbdt import LGBModel
+        kwargs = {
+            "loss": "mse" ,
+            "colsample_bytree": 0.8879,
+            "learning_rate": 0.0421,
+            "subsample": 0.8789,
+            "lambda_l1": 205.6999,
+            "lambda_l2": 580.9768,
+            "max_depth": 8,
+            "num_leaves": 210,
+            "num_threads": 20,
+        }
+        LGBModel(kwargs)
+
+
 Qlib Init Section
 --------------------
 
diff --git a/examples/benchmarks_dynamic/DDG-DA/README.md b/examples/benchmarks_dynamic/DDG-DA/README.md
index 8e8c76b56f..e113c7e937 100644
--- a/examples/benchmarks_dynamic/DDG-DA/README.md
+++ b/examples/benchmarks_dynamic/DDG-DA/README.md
@@ -1,6 +1,9 @@
 # Introduction
 This is the implementation of `DDG-DA` based on `Meta Controller` component provided by `Qlib`.
 
+Please refer to the paper for more details: *DDG-DA: Data Distribution Generation for Predictable Concept Drift Adaptation* [[arXiv](https://arxiv.org/abs/2201.04038)]
+
+
 ## Background
 In many real-world scenarios, we often deal with streaming data that is sequentially collected over time. Due to the non-stationary nature of the environment, the streaming data distribution may change in unpredictable ways, which is known as concept drift. To handle concept drift, previous methods first detect when/where the concept drift happens and then adapt models to fit the distribution of the latest data. However, there are still many cases that some underlying factors of environment evolution are predictable, making it possible to model the future concept drift trend of the streaming data, while such cases are not fully explored in previous work.
 
diff --git a/examples/benchmarks_dynamic/DDG-DA/workflow.py b/examples/benchmarks_dynamic/DDG-DA/workflow.py
index e6f5df46d2..f7acac075b 100644
--- a/examples/benchmarks_dynamic/DDG-DA/workflow.py
+++ b/examples/benchmarks_dynamic/DDG-DA/workflow.py
@@ -147,6 +147,9 @@ def train_meta_model(self):
             },
             # "record": ["qlib.workflow.record_temp.SignalRecord"]
         }
+        # the proxy_forecast_model_task will be used to create meta tasks.
+        # The test date of first task will be 2011-01-01. Each test segment will be about 20days
+        # The tasks include all training tasks and test tasks.
 
         # 2) preparing meta dataset
         kwargs = dict(
diff --git a/examples/run_all_model.py b/examples/run_all_model.py
index 4451cafaab..71ce10a411 100644
--- a/examples/run_all_model.py
+++ b/examples/run_all_model.py
@@ -186,7 +186,7 @@ def gen_and_save_md_table(metrics, dataset):
 # read yaml, remove seed kwargs of model, and then save file in the temp_dir
 def gen_yaml_file_without_seed_kwargs(yaml_path, temp_dir):
     with open(yaml_path, "r") as fp:
-        config = yaml.load(fp)
+        config = yaml.safe_load(fp)
     try:
         del config["task"]["model"]["kwargs"]["seed"]
     except KeyError: