microsoft · peteryang1 · Jul 3, 2023 · Jul 3, 2023 · Jul 3, 2023
diff --git a/qlib/finco/llm.py b/qlib/finco/llm.py
@@ -62,7 +62,7 @@ def try_create_chat_completion(self, max_retry=10, **kwargs):
             try:
                 response = self.create_chat_completion(**kwargs)
                 return response
-            except (openai.error.RateLimitError, openai.error.Timeout) as e:
+            except (openai.error.RateLimitError, openai.error.Timeout, openai.error.APIError) as e:
                 print(e)
                 print(f"Retrying {i+1}th time...")
                 time.sleep(1)

diff --git a/qlib/finco/prompt_template.yaml b/qlib/finco/prompt_template.yaml
@@ -126,7 +126,9 @@ HyperparameterActionTask_system : |-
   The user has provided the requirements, chose the predefined classes and made plan and reason to each component. You should strictly follow user's choice and you should provide the reason of your hyperparameter choices if exist and some suggestion if the user wants to finetune the hyperparameters after the hyperparameter. 
 
   You only need to response the hyperparameters in the exact format in exsample below with no explanation or conversation. "Hyperparameters:", "Reason:", "Improve suggestion:" are key tags so always include them in response.
-  {% if target_module == "DataHandler" %}
+  {% if target_module == "Dataset" %}
+  Caution, if the user chose {qlib.data.dataset}-{DatasetH}, always remember to set hyperparameter: {segments}!
+  {% elif target_module == "DataHandler" %}
   Qlib has these processors {processor_name}-{hyperparameter kwargs}: 
   {DropnaProcessor}-{['fields_group']},{DropnaLabel}-{['fields_group']},{CSRankNorm}-{['fields_group']},{ProcessInf}-{[]},{Processor}-{[]},{MinMaxNorm}-{['fit_start_time', 'fit_end_time', 'fields_group']},{CSZFillna}-{['fields_group']},{TanhProcess}-{[]},{CSZScoreNorm}-{['fields_group', 'method']},{RobustZScoreNorm}-{['fit_start_time', 'fit_end_time', 'fields_group', 'clip_outlier']},{FilterCol}-{['fields_group', 'col_list']},{HashStockFormat}-{[]},{ZScoreNorm}-{['fit_start_time', 'fit_end_time', 'fields_group']},{DropCol}-{['col_list']},{Fillna}-{['fields_group', 'fill_value']}. 
   You can choose some of them to use in {infer_processors} or {learn_processors} if necessary and pick the kwargs of them.
@@ -342,6 +344,9 @@ ConfigActionTask_system: |-
   {%for module_path, class_name in classes%}{% raw %}{{% endraw %}{{module_path}}{% raw %}}{% endraw %}-{% raw %}{{% endraw %}{{class_name}}{% raw %}}{% endraw %}.{% endfor %} and you have decided all the hyperparameters.
   {% endif %}
 
+  The predefined classes and user's hint are hard requirments, you should copy them to your answer with no modification to avoid errors!
+  "```yaml(.*)" and "```" are key tags in response, always include them in your response!
+
   Default in user's hyperparameter means using default value in Qlib code. So always remember to avoid puting them in the config and delete this key in yaml string!!!
   You only output the target component part of the config, Don't output all the config file!!!
 
@@ -352,6 +357,7 @@ ConfigActionTask_system: |-
   4. each predefined class's hyperparameter to initialize the class
 
   You will response the YAML config with no explanation and interaction.
+  Most importantly, always make sure the yaml string you response can be converted to yaml object without any format issue!
 
   Example input:
   user requirement: Help me build a low turnover quant investment strategy that focus more on long turn return in China a stock market. I want to use a big LSTM model and add several MLP layer before the head.
@@ -441,7 +447,7 @@ ConfigActionTask_system: |-
   target component: Backtest
   {% endif %}
   Example output:
-  """yaml{% if target_module == "Dataset" %}
+  ```yaml{% if target_module == "Dataset" %}
   dataset:
     class: DatasetH
     module_path: qlib.data.dataset
@@ -511,7 +517,7 @@ ConfigActionTask_system: |-
         open_cost: 0.0005
         close_cost: 0.0015
         min_cost: 5
-  {% endif %}"""
+  {% endif %}```
 
 ConfigActionTask_user: |-
   user requirement: {{user_requirement}}

diff --git a/qlib/finco/task.py b/qlib/finco/task.py
@@ -262,8 +262,27 @@ def execute(self):
         if confirm is False:
             return []
 
-        command = f"qrun {workflow_path}"
-        self._output = subprocess.check_output(command, shell=True, cwd=workspace)
+        command = ["qrun", str(workflow_path)]
+        try:
+            # Run the command and capture the output
+            workspace = self._context_manager.get_context("workspace")
+            result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True, cwd=str(workspace))
+
+        except subprocess.CalledProcessError as e:
+            print(f"An error occurred while running the subprocess: {e.stderr} {e.stdout}")
+            real_error = e.stderr+e.stdout
+            if "model" in  e.stdout.lower():
+                return [HyperparameterActionTask("Model", regenerate=True, error=real_error), ConfigActionTask("Model"), YamlEditTask("Model"), TrainTask()]
+            elif "dataset" in  e.stdout.lower() or "handler" in  e.stdout.lower():
+                return [HyperparameterActionTask("Dataset", regenerate=True, error=real_error), HyperparameterActionTask("DataHandler", regenerate=True, error=real_error), ConfigActionTask("Dataset"), ConfigActionTask("DataHandler"), YamlEditTask("Dataset"), YamlEditTask("DataHandler"), TrainTask()]
+            else:
+                ret_list = []
+                for component in COMPONENT_LIST:
+                    ret_list.append(HyperparameterActionTask(component, regenerate=True, error=real_error))
+                    ret_list.append(ConfigActionTask(component))
+                    ret_list.append(YamlEditTask(component))
+                ret_list.append(TrainTask())
+                return ret_list
 
         return [AnalysisTask()]
 
@@ -403,11 +422,12 @@ def user(self):
 
 
 class HyperparameterActionTask(ActionTask):
-    def __init__(self, component, regenerate=False, error=None) -> None:
+    def __init__(self, component, regenerate=False, error=None, error_type=None) -> None:
         super().__init__()
         self.target_component = component
         self.regenerate = regenerate
         self.error = error
+        self.error_type = error_type
 
     def execute(self):
         user_prompt = self._context_manager.get_context("user_prompt")
@@ -444,7 +464,10 @@ def execute(self):
         )
         former_messages = []
         if self.regenerate:
-            user_prompt = f"your hyperparameter cannot be initialized, may be caused by wrong format of the value or wrong name or some value is not supported in Qlib.\nPlease rewrite the hyperparameters and answer with exact required format in system prompt and reply with no more explainations.\nThe error message: {self.error}. Please correct the former answer accordingly.\nHyperparameters, Reason and Improve suggestion should always be included."
+            if self.error_type == "yaml":
+                user_prompt = f"your yaml config generated from your hyperparameter is not in the right format.\n The Yaml string generated from the hyperparameters is not in the right format.\nPlease rewrite the hyperparameters and answer with exact required format in system prompt and reply with no more explainations.\nThe error message: {self.error}. Please correct the former answer accordingly.\nHyperparameters, Reason and Improve suggestion should always be included."
+            else:
+                user_prompt = f"your hyperparameter cannot be initialized, may be caused by wrong format of the value or wrong name or some value is not supported in Qlib.\nPlease rewrite the hyperparameters and answer with exact required format in system prompt and reply with no more explainations.\nThe error message: {self.error}. Please correct the former answer accordingly.\nHyperparameters, Reason and Improve suggestion should always be included."
             former_messages = self._context_manager.get_context("chat_history")[self.__class__.__name__][self.target_component][1:]
         response = APIBackend().build_messages_and_create_chat_completion(
             user_prompt, system_prompt, former_messages=former_messages
@@ -472,11 +495,9 @@ def execute(self):
 
 
 class ConfigActionTask(ActionTask):
-    def __init__(self, component, reconfig=False, error=None) -> None:
+    def __init__(self, component) -> None:
         super().__init__()
         self.target_component = component
-        self.reconfig = reconfig
-        self.error = error
 
     def execute(self):
         user_prompt = self._context_manager.get_context("user_prompt")
@@ -494,9 +515,9 @@ def execute(self):
             target_component_hyperparameters=target_component_hyperparameters
         )
         former_messages = []
-        if self.reconfig and user_prompt == self._context_manager.get_context("chat_history")[self.__class__.__name__][self.target_component][-2]["content"]:
-            user_prompt = f"your config cannot be converted to YAML, may be caused by wrong format. Please rewrite the yaml and answer with exact required format in system prompt and reply with no more explainations.\nerror message: {self.error}\n"
-            former_messages = self._context_manager.get_context("chat_history")[self.__class__.__name__][self.target_component][1:]
+        # if self.reconfig and user_prompt == self._context_manager.get_context("chat_history")[self.__class__.__name__][self.target_component][-2]["content"]:
+        #     user_prompt = f"your config cannot be converted to YAML, may be caused by wrong format. Please rewrite the yaml and answer with exact required format in system prompt and reply with no more explainations.\nerror message: {self.error}\n"
+        #     former_messages = self._context_manager.get_context("chat_history")[self.__class__.__name__][self.target_component][1:]
         response = APIBackend().build_messages_and_create_chat_completion(
             user_prompt, system_prompt, former_messages=former_messages
         )
@@ -509,19 +530,26 @@ def execute(self):
             yaml_config = yaml.safe_load(io.StringIO(config))
         except yaml.YAMLError as e:
             self.logger.info(f"Yaml file is not in the correct format: {e}")
-            return_tasks = [HyperparameterActionTask(self.target_component, regenerate=True, error=str(e)),  ConfigActionTask(self.target_component, reconfig=True, error=str(e))]
+            return_tasks = [HyperparameterActionTask(self.target_component, regenerate=True, error=str(e), error_type="yaml"),  ConfigActionTask(self.target_component)]
             return return_tasks
 
-        if self.target_component == "DataHandler":
+        if self.target_component == "Dataset":
+            if 'handler' in yaml_config["dataset"]:
+                del yaml_config['dataset']['handler']
+        elif self.target_component == "DataHandler":
             for processor in yaml_config['handler']['kwargs']['infer_processors']:
                 if "kwargs" in processor and "fields_group" in processor["kwargs"]:
                     del processor["kwargs"]['fields_group']
             for processor in yaml_config['handler']['kwargs']['learn_processors']:
                 if "kwargs" in processor and "fields_group" in processor["kwargs"]:
                     del processor["kwargs"]['fields_group']
 
-            if 'freq' in yaml_config['handler']['kwargs'] and yaml_config['handler']['kwargs']['freq'] == '1d':
-                yaml_config['handler']['kwargs']['freq'] = "day"
+            if 'freq' in yaml_config['handler']['kwargs']:
+                yaml_config['handler']['kwargs']['freq'] = "day" # TODO hot fix freq because no data
+        elif self.target_component == "Record":
+            for record in yaml_config['record']:
+                if record['class'] == 'SigAnaRecord' and 'label_col' in record['kwargs']:
+                    del record['kwargs']["label_col"]
 
         def remove_default(config):
             if isinstance(config, dict):
@@ -688,12 +716,17 @@ def execute(self):
         else:
             real_target_config_key = self.target_config_key
 
-
-
         # 3) replace the module
         assert isinstance(update_config, dict) and real_target_config_key in update_config, "The config file is not in the correct format"
         assert self.replace_key_value_recursive(target_config, real_target_config_key, update_config[real_target_config_key]), "Replace of the yaml file failed."
-
+
+        # TODO hotfix for the bug that the record signalrecord config is not updated
+        for record in target_config['task']['record']:
+            if record['class'] == 'SignalRecord':
+                if 'model' in record['kwargs']:
+                    del record['kwargs']["model"]
+                if 'dataset' in record['kwargs']:
+                    del record['kwargs']["dataset"]
 
         # 4) save the config file
         with self.original_config_location.open("w") as f: