microsoft · Fivele-Li · Jul 17, 2023 · Jul 16, 2023 · Jul 17, 2023 · Jul 17, 2023
diff --git a/qlib/finco/knowledge.py b/qlib/finco/knowledge.py
@@ -7,7 +7,7 @@
 from qlib.workflow import R
 from qlib.finco.log import FinCoLog
 from qlib.finco.llm import APIBackend
-from qlib.finco.utils import similarity, random_string
+from qlib.finco.utils import similarity, random_string, SingletonBaseClass
 
 logger = FinCoLog()
 
@@ -140,8 +140,10 @@ def summarize(self, **kwargs):
         Return
         ------
         """
+        knowledge = []
         for storage in self.storages:
-            self.knowledge.extend(storage.documents)
+            knowledge.extend(storage.documents)
+        self.knowledge = knowledge
 
     @classmethod
     def load(cls, path: Union[str, Path]):
@@ -212,12 +214,16 @@ def __init__(self, storages: Union[List[YamlStorage], YamlStorage]):
 
         self.summarize()
 
-    def add(self, docs: List):
-        storage = YamlStorage(path=self.workdir.joinpath(self.name).joinpath(YamlStorage.DEFAULT_NAME))
-        storage.add(documents=docs)
-        self.storages.append(storage)
-        self.summarize()
+    def add(self, docs: List, storage_name: str = YamlStorage.DEFAULT_NAME):
+        storage = self.get_storage(storage_name)
+        if storage is None:
+            storage = YamlStorage(path=self.workdir.joinpath(self.name).joinpath(storage_name))
+            storage.add(documents=docs)
+            self.storages.append(storage)
+        else:
+            storage.add(documents=docs)
 
+        self.summarize()
         self.save()
 
 
@@ -232,18 +238,27 @@ def __init__(self, storages: Union[List[YamlStorage], YamlStorage]):
         storage = self.get_storage(YamlStorage.DEFAULT_NAME)
         if len(storage.documents) == 0:
             docs = self.read_files_in_directory(self.workdir.joinpath(self.name))
+            docs.extend([
+                {"content": "[Success]: XXXX, the results looks reasonable  # Keywords: supervised learning, data"},
+                {"content": "[Fail]: XXXX, it raise memory error due to  YYYYY  "
+                            "# Keywords: supervised learning, data"}])
             self.add(docs)
-
-    def add(self, docs: List):
-        storage = YamlStorage(path=self.workdir.joinpath(self.name).joinpath(YamlStorage.DEFAULT_NAME))
-        storage.add(documents=docs)
-        self.storages.append(storage)
         self.summarize()
 
+    def add(self, docs: List, storage_name: str = YamlStorage.DEFAULT_NAME):
+        storage = self.get_storage(storage_name)
+        if storage is None:
+            storage = YamlStorage(path=self.workdir.joinpath(self.name).joinpath(storage_name))
+            storage.add(documents=docs)
+            self.storages.append(storage)
+        else:
+            storage.add(documents=docs)
+
+        self.summarize()
         self.save()
 
     @staticmethod
-    def read_files_in_directory(directory):
+    def read_files_in_directory(directory) -> List:
         """
         read all .txt files under directory
         """
@@ -265,12 +280,24 @@ def __init__(self, storages: Union[List[YamlStorage], YamlStorage]):
         super().__init__(storages=storages, name="execute")
         self.summarize()
 
-    def add(self, docs: List):
-        storage = YamlStorage(path=self.workdir.joinpath(YamlStorage.DEFAULT_NAME))
-        storage.add(documents=docs)
-        self.storages.append(storage)
+        storage = self.get_storage(YamlStorage.DEFAULT_NAME)
+        if len(storage.documents) == 0:
+            docs = [{"content": "[Success]: XXXX, the results looks reasonable  # Keywords: supervised learning, data"},
+                    {"content": "[Fail]: XXXX, it raise memory error due to  YYYYY  "
+                                "# Keywords: supervised learning, data"}]
+            self.add(docs)
         self.summarize()
 
+    def add(self, docs: List, storage_name: str = YamlStorage.DEFAULT_NAME):
+        storage = self.get_storage(storage_name)
+        if storage is None:
+            storage = YamlStorage(path=self.workdir.joinpath(self.name).joinpath(storage_name))
+            storage.add(documents=docs)
+            self.storages.append(storage)
+        else:
+            storage.add(documents=docs)
+
+        self.summarize()
         self.save()
 
 
@@ -285,17 +312,26 @@ def __init__(self, storages: Union[List[YamlStorage], YamlStorage]):
         storage = self.get_storage(YamlStorage.DEFAULT_NAME)
         if len(storage.documents) == 0:
             docs = self.get_functions_and_docstrings(Path(__file__).parent.parent.parent)
+            docs.extend([{"docstring": "All the models can be import from `qlib.contrib.models`  "
+                                       "# Keywords: supervised learning"},
+                         {"docstring": "The API to run rolling models can be found in …   #Keywords: control"},
+                         {"docstring": "Here are a list of Qlib’s available analyzers.    #KEYWORDS: analysis"}])
             self.add(docs)
-
-    def add(self, docs: List):
-        storage = YamlStorage(path=self.workdir.joinpath(self.name).joinpath(YamlStorage.DEFAULT_NAME))
-        storage.add(documents=docs)
-        self.storages.append(storage)
         self.summarize()
 
+    def add(self, docs: List, storage_name: str = YamlStorage.DEFAULT_NAME):
+        storage = self.get_storage(storage_name)
+        if storage is None:
+            storage = YamlStorage(path=self.workdir.joinpath(self.name).joinpath(storage_name))
+            storage.add(documents=docs)
+            self.storages.append(storage)
+        else:
+            storage.add(documents=docs)
+
+        self.summarize()
         self.save()
 
-    def get_functions_and_docstrings(self, directory):
+    def get_functions_and_docstrings(self, directory) -> List:
         """
         get all method and docstring in .py files under directory
 
@@ -350,15 +386,16 @@ def __init__(self, name: str, describe: Template):
         self.logger = FinCoLog()
 
     def summarize(self, docs: list):
-        self.logger.info(f"Summarize topic: \nname: {self.name}\ndescribe: {self.describe.module}")
+        self.logger.info(f"Summarize Topic \nname: {self.name}\ndescribe: {self.describe.module}")
         prompt_workflow_selection = self.describe.render(docs=docs)
         response = APIBackend().build_messages_and_create_chat_completion(user_prompt=prompt_workflow_selection)
 
         self.knowledge = response
         self.docs = docs
+        self.logger.info(f"Summary of {self.name}:\n{self.knowledge}")
 
 
-class KnowledgeBase:
+class KnowledgeBase(SingletonBaseClass):
     """
     Load knowledge, offer brief information of knowledge and common handle interfaces
     """
@@ -431,10 +468,10 @@ def get_knowledge(self, knowledge_type: str = None):
             knowledge = self.infrastructure_knowledge.knowledge
         else:
             knowledge = (
-                self.execute_knowledge.knowledge
-                + self.practice_knowledge.knowledge
-                + self.finance_knowledge.knowledge
-                + self.infrastructure_knowledge.knowledge
+                    self.execute_knowledge.knowledge
+                    + self.practice_knowledge.knowledge
+                    + self.finance_knowledge.knowledge
+                    + self.infrastructure_knowledge.knowledge
             )
         return knowledge
 
@@ -461,12 +498,20 @@ def query(self, knowledge_type: str = None, content: str = None, n: int = 5):
         similar_n_docs = [knowledge[i] for i in similar_n_indexes]
 
         prompt = Template(
-            """find the most relevant doc with this query: '{{content}}' from docs='{{docs}}'.
-            Just return the most relevant item I provided, no more explain. For example: {'function': 'config.resolve_path', 'docstring': None}"""
+            """find the most relevant doc with this query: '{{content}}' 
+            from docs='{{docs}}. Just return the most relevant item I provided, no more explain. 
+            For example: 
+            user: find the most relevant doc with this query: ab \n from docs = {abc, xyz, lmn}.
+            response: abc
+            """
         )
         prompt_workflow_selection = prompt.render(content=content, docs=similar_n_docs)
         response = APIBackend().build_messages_and_create_chat_completion(
             user_prompt=prompt_workflow_selection, system_prompt="You are an excellent assistant."
         )
 
         return response
+
+
+# perhaps init KnowledgeBase in other place
+KnowledgeBase(workdir=Path.cwd().joinpath('knowledge'))
diff --git a/qlib/finco/prompt_template.yaml b/qlib/finco/prompt_template.yaml
@@ -993,7 +993,7 @@ SummarizeTask_context_user : |-
   Here is my information: '{{key}}:{{value}}'
 
 SummarizeTask_metrics_system : |-
-  Your purpose is to summarize the information by metrics in markdown format.
+  Your purpose is to summarize the information by metrics in markdown format. If possible, try to display data in percentages.
 
 SummarizeTask_metrics_user : |-
   Here is my information: '{{information}}'
@@ -1012,7 +1012,10 @@ LearnManager_user : |-
   you will adjust {{task}}'s system prompt to:
 
 Topic_IC : |-
-  Summarize the influence of parameters on IC: {{docs}}
+  Summarize the influence of parameters on IC: {{docs}}. (Example response: Max draw-down become larger over time)
 
 Topic_MaxDropDown : |-
-  Summarize the influence of parameters on max dropdown: {{docs}}
+  Summarize the influence of parameters on max dropdown: {{docs}}. (Example response: Max draw-down become larger over time)
+
+Topic_RollingModel : |-
+  What conclusion can you draw from: {{docs}}. Answer questions as concisely as possible. (Example response: rolling model is good at making the Max draw-down smaller.)