Skip to content

Optimize KnowledgeBase to complete workflow #1598

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 76 additions & 31 deletions qlib/finco/knowledge.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from qlib.workflow import R
from qlib.finco.log import FinCoLog
from qlib.finco.llm import APIBackend
from qlib.finco.utils import similarity, random_string
from qlib.finco.utils import similarity, random_string, SingletonBaseClass

logger = FinCoLog()

Expand Down Expand Up @@ -140,8 +140,10 @@ def summarize(self, **kwargs):
Return
------
"""
knowledge = []
for storage in self.storages:
self.knowledge.extend(storage.documents)
knowledge.extend(storage.documents)
self.knowledge = knowledge

@classmethod
def load(cls, path: Union[str, Path]):
Expand Down Expand Up @@ -212,12 +214,16 @@ def __init__(self, storages: Union[List[YamlStorage], YamlStorage]):

self.summarize()

def add(self, docs: List):
storage = YamlStorage(path=self.workdir.joinpath(self.name).joinpath(YamlStorage.DEFAULT_NAME))
storage.add(documents=docs)
self.storages.append(storage)
self.summarize()
def add(self, docs: List, storage_name: str = YamlStorage.DEFAULT_NAME):
storage = self.get_storage(storage_name)
if storage is None:
storage = YamlStorage(path=self.workdir.joinpath(self.name).joinpath(storage_name))
storage.add(documents=docs)
self.storages.append(storage)
else:
storage.add(documents=docs)

self.summarize()
self.save()


Expand All @@ -232,18 +238,27 @@ def __init__(self, storages: Union[List[YamlStorage], YamlStorage]):
storage = self.get_storage(YamlStorage.DEFAULT_NAME)
if len(storage.documents) == 0:
docs = self.read_files_in_directory(self.workdir.joinpath(self.name))
docs.extend([
{"content": "[Success]: XXXX, the results looks reasonable # Keywords: supervised learning, data"},
{"content": "[Fail]: XXXX, it raise memory error due to YYYYY "
"# Keywords: supervised learning, data"}])
self.add(docs)

def add(self, docs: List):
storage = YamlStorage(path=self.workdir.joinpath(self.name).joinpath(YamlStorage.DEFAULT_NAME))
storage.add(documents=docs)
self.storages.append(storage)
self.summarize()

def add(self, docs: List, storage_name: str = YamlStorage.DEFAULT_NAME):
storage = self.get_storage(storage_name)
if storage is None:
storage = YamlStorage(path=self.workdir.joinpath(self.name).joinpath(storage_name))
storage.add(documents=docs)
self.storages.append(storage)
else:
storage.add(documents=docs)

self.summarize()
self.save()

@staticmethod
def read_files_in_directory(directory):
def read_files_in_directory(directory) -> List:
"""
read all .txt files under directory
"""
Expand All @@ -265,12 +280,24 @@ def __init__(self, storages: Union[List[YamlStorage], YamlStorage]):
super().__init__(storages=storages, name="execute")
self.summarize()

def add(self, docs: List):
storage = YamlStorage(path=self.workdir.joinpath(YamlStorage.DEFAULT_NAME))
storage.add(documents=docs)
self.storages.append(storage)
storage = self.get_storage(YamlStorage.DEFAULT_NAME)
if len(storage.documents) == 0:
docs = [{"content": "[Success]: XXXX, the results looks reasonable # Keywords: supervised learning, data"},
{"content": "[Fail]: XXXX, it raise memory error due to YYYYY "
"# Keywords: supervised learning, data"}]
self.add(docs)
self.summarize()

def add(self, docs: List, storage_name: str = YamlStorage.DEFAULT_NAME):
storage = self.get_storage(storage_name)
if storage is None:
storage = YamlStorage(path=self.workdir.joinpath(self.name).joinpath(storage_name))
storage.add(documents=docs)
self.storages.append(storage)
else:
storage.add(documents=docs)

self.summarize()
self.save()


Expand All @@ -285,17 +312,26 @@ def __init__(self, storages: Union[List[YamlStorage], YamlStorage]):
storage = self.get_storage(YamlStorage.DEFAULT_NAME)
if len(storage.documents) == 0:
docs = self.get_functions_and_docstrings(Path(__file__).parent.parent.parent)
docs.extend([{"docstring": "All the models can be import from `qlib.contrib.models` "
"# Keywords: supervised learning"},
{"docstring": "The API to run rolling models can be found in … #Keywords: control"},
{"docstring": "Here are a list of Qlib’s available analyzers. #KEYWORDS: analysis"}])
self.add(docs)

def add(self, docs: List):
storage = YamlStorage(path=self.workdir.joinpath(self.name).joinpath(YamlStorage.DEFAULT_NAME))
storage.add(documents=docs)
self.storages.append(storage)
self.summarize()

def add(self, docs: List, storage_name: str = YamlStorage.DEFAULT_NAME):
storage = self.get_storage(storage_name)
if storage is None:
storage = YamlStorage(path=self.workdir.joinpath(self.name).joinpath(storage_name))
storage.add(documents=docs)
self.storages.append(storage)
else:
storage.add(documents=docs)

self.summarize()
self.save()

def get_functions_and_docstrings(self, directory):
def get_functions_and_docstrings(self, directory) -> List:
"""
get all method and docstring in .py files under directory

Expand Down Expand Up @@ -350,15 +386,16 @@ def __init__(self, name: str, describe: Template):
self.logger = FinCoLog()

def summarize(self, docs: list):
self.logger.info(f"Summarize topic: \nname: {self.name}\ndescribe: {self.describe.module}")
self.logger.info(f"Summarize Topic \nname: {self.name}\ndescribe: {self.describe.module}")
prompt_workflow_selection = self.describe.render(docs=docs)
response = APIBackend().build_messages_and_create_chat_completion(user_prompt=prompt_workflow_selection)

self.knowledge = response
self.docs = docs
self.logger.info(f"Summary of {self.name}:\n{self.knowledge}")


class KnowledgeBase:
class KnowledgeBase(SingletonBaseClass):
"""
Load knowledge, offer brief information of knowledge and common handle interfaces
"""
Expand Down Expand Up @@ -431,10 +468,10 @@ def get_knowledge(self, knowledge_type: str = None):
knowledge = self.infrastructure_knowledge.knowledge
else:
knowledge = (
self.execute_knowledge.knowledge
+ self.practice_knowledge.knowledge
+ self.finance_knowledge.knowledge
+ self.infrastructure_knowledge.knowledge
self.execute_knowledge.knowledge
+ self.practice_knowledge.knowledge
+ self.finance_knowledge.knowledge
+ self.infrastructure_knowledge.knowledge
)
return knowledge

Expand All @@ -461,12 +498,20 @@ def query(self, knowledge_type: str = None, content: str = None, n: int = 5):
similar_n_docs = [knowledge[i] for i in similar_n_indexes]

prompt = Template(
"""find the most relevant doc with this query: '{{content}}' from docs='{{docs}}'.
Just return the most relevant item I provided, no more explain. For example: {'function': 'config.resolve_path', 'docstring': None}"""
"""find the most relevant doc with this query: '{{content}}'
from docs='{{docs}}. Just return the most relevant item I provided, no more explain.
For example:
user: find the most relevant doc with this query: ab \n from docs = {abc, xyz, lmn}.
response: abc
"""
)
prompt_workflow_selection = prompt.render(content=content, docs=similar_n_docs)
response = APIBackend().build_messages_and_create_chat_completion(
user_prompt=prompt_workflow_selection, system_prompt="You are an excellent assistant."
)

return response


# perhaps init KnowledgeBase in other place
KnowledgeBase(workdir=Path.cwd().joinpath('knowledge'))
9 changes: 6 additions & 3 deletions qlib/finco/prompt_template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -993,7 +993,7 @@ SummarizeTask_context_user : |-
Here is my information: '{{key}}:{{value}}'

SummarizeTask_metrics_system : |-
Your purpose is to summarize the information by metrics in markdown format.
Your purpose is to summarize the information by metrics in markdown format. If possible, try to display data in percentages.

SummarizeTask_metrics_user : |-
Here is my information: '{{information}}'
Expand All @@ -1012,7 +1012,10 @@ LearnManager_user : |-
you will adjust {{task}}'s system prompt to:

Topic_IC : |-
Summarize the influence of parameters on IC: {{docs}}
Summarize the influence of parameters on IC: {{docs}}. (Example response: Max draw-down become larger over time)

Topic_MaxDropDown : |-
Summarize the influence of parameters on max dropdown: {{docs}}
Summarize the influence of parameters on max dropdown: {{docs}}. (Example response: Max draw-down become larger over time)

Topic_RollingModel : |-
What conclusion can you draw from: {{docs}}. Answer questions as concisely as possible. (Example response: rolling model is good at making the Max draw-down smaller.)
Loading