From 2af372179ebd3fb1fc41af5fdd23a820436aa84e Mon Sep 17 00:00:00 2001 From: eren23 Date: Sat, 25 Mar 2023 16:57:55 +0100 Subject: [PATCH] prompt template added --- examples/prompt_template_example.ipynb | 571 ++++++++++++++++++++++ knowledgegpt/extractors/base_extractor.py | 6 +- knowledgegpt/utils/utils_completion.py | 6 +- knowledgegpt/utils/utils_prompt.py | 16 +- 4 files changed, 589 insertions(+), 10 deletions(-) create mode 100644 examples/prompt_template_example.ipynb diff --git a/examples/prompt_template_example.ipynb b/examples/prompt_template_example.ipynb new file mode 100644 index 0000000..f371af5 --- /dev/null +++ b/examples/prompt_template_example.ipynb @@ -0,0 +1,571 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# !python3 -m spacy download en_core_web_sm" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from knowledgegpt.extractors.base_extractor import BaseExtractor\n", + "from knowledgegpt.utils.utils_scrape import scrape_content" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import openai\n", + "from example_config import SECRET_KEY\n", + "openai.api_key = SECRET_KEY" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# # Scrape content from a webpage\n", + "# any other dataframes can be used as well, the only requirement is that the column name is \"content\"\n", + "df = scrape_content(\"https://en.wikipedia.org/wiki/Bombard_(weapon)\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
content
0Bombard (weapon) - Wikipedia
1Open main menu
2Home
3Random
4Nearby
5Log in
6Settings
7Donate
8About Wikipedia
9Disclaimers
10Search
11Bombard (weapon)
12Article\\nTalk
13Language
14Watch
15Edit
16The bombard is a type of cannon or mortar whic...
17Contents\\n1 Terminology\\n2 History\\n3 Notable ...
18TerminologyEdit\\nThe term \"bombard\" was first ...
19GalleryEdit
20\"Hand bombard\", 1390–1400
21200 kg wrought iron bombard, circa 1450, Metz,...
22The Dardanelles Gun.
23Mons Meg at Edinburgh Castle, mid-15th century
24Mons Meg cannonballs
25English Bombards abandoned during the Hundred ...
26Bombard from the beginning of the 15th century...
27Early Ming bombard with two pair of trunnions,...
28Acehnese guns including two bombards (closer t...
2915th century bombard mounted on carriage, Warsaw.
30Bombard in its siege position, Denmark.
31A bombard recovered from the well of Cardiff c...
32Wikimedia Commons has media related to Bombard...
33See alsoEdit\\nList of the largest cannons by c...
34^ a b c Sands, Kathleen (1999). \"Though One Of...
35^ Andrade 2016, p. 83.
36^ DeVries 2012, p. 155.
37^ Gwei-Djen, Lu; Needham, Joseph; Chi-Hsing, P...
38^ Aung-Thwin, Michael; Hall, Kenneth (2011). \"...
39^ File:Westgate 076.jpg
40^ \"Reconstruction of Norham Castle Seige by Sc...
41^ W. H. Finlayson. The Scottish Historical Rev...
42^ \"Bodiam Bombard | Kent and Sussex Courier\". ...
43^ Schmidtchen (1977b), pp. 226–228
44ReferencesEdit\\n  This article incorporates te...
45Retrieved from \"https://en.wikipedia.org/w/ind...
46Last edited on 25 February 2023, at 21:41
47Languages
48БеларускаяБеларуская (тарашкевіца)БългарскиCat...
49This page was last edited on 25 February 2023,...
50Privacy policy\\nAbout Wikipedia\\nDisclaimers\\n...
\n", + "
" + ], + "text/plain": [ + " content\n", + "0 Bombard (weapon) - Wikipedia\n", + "1 Open main menu\n", + "2 Home\n", + "3 Random\n", + "4 Nearby\n", + "5 Log in\n", + "6 Settings\n", + "7 Donate\n", + "8 About Wikipedia\n", + "9 Disclaimers\n", + "10 Search\n", + "11 Bombard (weapon)\n", + "12 Article\\nTalk\n", + "13 Language\n", + "14 Watch\n", + "15 Edit\n", + "16 The bombard is a type of cannon or mortar whic...\n", + "17 Contents\\n1 Terminology\\n2 History\\n3 Notable ...\n", + "18 TerminologyEdit\\nThe term \"bombard\" was first ...\n", + "19 GalleryEdit\n", + "20 \"Hand bombard\", 1390–1400\n", + "21 200 kg wrought iron bombard, circa 1450, Metz,...\n", + "22 The Dardanelles Gun.\n", + "23 Mons Meg at Edinburgh Castle, mid-15th century\n", + "24 Mons Meg cannonballs\n", + "25 English Bombards abandoned during the Hundred ...\n", + "26 Bombard from the beginning of the 15th century...\n", + "27 Early Ming bombard with two pair of trunnions,...\n", + "28 Acehnese guns including two bombards (closer t...\n", + "29 15th century bombard mounted on carriage, Warsaw.\n", + "30 Bombard in its siege position, Denmark.\n", + "31 A bombard recovered from the well of Cardiff c...\n", + "32 Wikimedia Commons has media related to Bombard...\n", + "33 See alsoEdit\\nList of the largest cannons by c...\n", + "34 ^ a b c Sands, Kathleen (1999). \"Though One Of...\n", + "35 ^ Andrade 2016, p. 83.\n", + "36 ^ DeVries 2012, p. 155.\n", + "37 ^ Gwei-Djen, Lu; Needham, Joseph; Chi-Hsing, P...\n", + "38 ^ Aung-Thwin, Michael; Hall, Kenneth (2011). \"...\n", + "39 ^ File:Westgate 076.jpg\n", + "40 ^ \"Reconstruction of Norham Castle Seige by Sc...\n", + "41 ^ W. H. Finlayson. The Scottish Historical Rev...\n", + "42 ^ \"Bodiam Bombard | Kent and Sussex Courier\". ...\n", + "43 ^ Schmidtchen (1977b), pp. 226–228\n", + "44 ReferencesEdit\\n  This article incorporates te...\n", + "45 Retrieved from \"https://en.wikipedia.org/w/ind...\n", + "46 Last edited on 25 February 2023, at 21:41\n", + "47 Languages\n", + "48 БеларускаяБеларуская (тарашкевіца)БългарскиCat...\n", + "49 This page was last edited on 25 February 2023,...\n", + "50 Privacy policy\\nAbout Wikipedia\\nDisclaimers\\n..." + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "template = '''\n", + "Test prompt template\n", + "Answer the question as truthfully as possible using the provided context, and if the answer is not contained within the text below, say \"I don't know.\" \n", + "\n", + "Context: \n", + "{sections}\n", + "\n", + "Question: {question}\n", + "Answer:\n", + "'''" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Computing embeddings...\n", + "model_lang en\n", + "Selected 13 document sections:\n", + "49\n", + "43\n", + "36\n", + "45\n", + "35\n", + "42\n", + "22\n", + "17\n", + "38\n", + "39\n", + "12\n", + "20\n", + "8\n", + "\n", + "Test prompt template\n", + "Answer the question as truthfully as possible using the provided context, and if the answer is not contained within the text below, say \"I don't know.\" \n", + "\n", + "Context: \n", + "\n", + "* This page was last edited on 25 February 2023, at 21:41 (UTC). Content is available under CC BY-SA 3.0 unless otherwise noted.\n", + "* ^ Schmidtchen (1977b), pp. 226–228\n", + "* ^ DeVries 2012, p. 155.\n", + "* Retrieved from \"https://en.wikipedia.org/w/index.php?title=Bombard_(weapon)&oldid=1141599306\"\n", + "* ^ Andrade 2016, p. 83.\n", + "* ^ \"Bodiam Bombard | Kent and Sussex Courier\". Archived from the original on 2015-09-23. Retrieved 2015-04-13.\n", + "* The Dardanelles Gun.\n", + "* Contents 1 Terminology 2 History 3 Notable examples 4 Gallery 5 See also 6 Notes 7 References 8 Further reading\n", + "* ^ Aung-Thwin, Michael; Hall, Kenneth (2011). \"New Perspectives on the History and Historiography of Southeast Asia\": 85. {{cite journal}}: Cite journal requires |journal= (help)\n", + "* ^ File:Westgate 076.jpg\n", + "* Article Talk\n", + "* \"Hand bombard\", 1390–1400\n", + "* About Wikipedia\n", + "\n", + "Question: What is the title of this PDF?\n", + "Answer:\n", + "\n", + "all_done!\n" + ] + } + ], + "source": [ + "basic_extractor = BaseExtractor(dataframe=df, embedding_extractor=\"hf\", model_lang=\"en\", is_turbo=True, prompt_template=template)\n", + "answer, prompt, messages = basic_extractor.extract(\"What is the title of this PDF?\", max_tokens=300)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\"I don't know.\"" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "answer" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "What is Bombard?\n", + "all_done!\n" + ] + } + ], + "source": [ + "answer, prompt, messages = basic_extractor.extract(\"What is Bombard?\", max_tokens=300)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Bombard is a weapon.'" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "answer" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'role': 'system', 'content': 'you are a helpful assistant'},\n", + " {'role': 'user',\n", + " 'content': '\\nTest prompt template\\nAnswer the question as truthfully as possible using the provided context, and if the answer is not contained within the text below, say \"I don\\'t know.\" \\n\\nContext: \\n\\n* This page was last edited on 25 February 2023, at 21:41\\xa0(UTC). Content is available under CC BY-SA 3.0 unless otherwise noted.\\n* ^ Schmidtchen (1977b), pp. 226–228\\n* ^ DeVries 2012, p.\\xa0155.\\n* Retrieved from \"https://en.wikipedia.org/w/index.php?title=Bombard_(weapon)&oldid=1141599306\"\\n* ^ Andrade 2016, p.\\xa083.\\n* ^ \"Bodiam Bombard | Kent and Sussex Courier\". Archived from the original on 2015-09-23. Retrieved 2015-04-13.\\n* The Dardanelles Gun.\\n* Contents 1 Terminology 2 History 3 Notable examples 4 Gallery 5 See also 6 Notes 7 References 8 Further reading\\n* ^ Aung-Thwin, Michael; Hall, Kenneth (2011). \"New Perspectives on the History and Historiography of Southeast Asia\": 85. {{cite journal}}: Cite journal requires |journal= (help)\\n* ^ File:Westgate 076.jpg\\n* Article Talk\\n* \"Hand bombard\", 1390–1400\\n* About Wikipedia\\n\\nQuestion: What is the title of this PDF?\\nAnswer:\\n'},\n", + " {'role': 'assistant', 'content': \"I don't know.\"},\n", + " {'role': 'user', 'content': 'What is Bombard?'},\n", + " {'role': 'assistant', 'content': 'Bombard is a weapon.'}]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "messages" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Give me more details\n", + "all_done!\n" + ] + } + ], + "source": [ + "answer, prompt, messages = basic_extractor.extract(\"Give me more details\", max_tokens=300)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'A bombard is a large caliber, muzzle-loading artillery piece primarily used during the early modern period of Europe. This weapon was used to hurl heavy stone balls onto enemy fortifications. The bombard was first developed in the early 14th century and was used until the 17th century. It was a predecessor to the modern cannon.'" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "answer" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "knowledgegpt-env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/knowledgegpt/extractors/base_extractor.py b/knowledgegpt/extractors/base_extractor.py index cbf3bdc..9b005df 100644 --- a/knowledgegpt/extractors/base_extractor.py +++ b/knowledgegpt/extractors/base_extractor.py @@ -5,7 +5,7 @@ class BaseExtractor: def __init__(self, dataframe=None, embedding_extractor="hf", model_lang="en", is_turbo=False, index_type="basic", - verbose=False, index_path=None): + verbose=False, index_path=None, prompt_template=None): """ :param dataframe: if you have own df use it else choose correct extractor :param embedding_extractor: default hf, openai @@ -28,6 +28,7 @@ def __init__(self, dataframe=None, embedding_extractor="hf", model_lang="en", is self.is_turbo = is_turbo self.index_type = index_type self.verbose = verbose + self.prompt_template = prompt_template self.messages = [] self.embeddings = None @@ -102,7 +103,8 @@ def extract(self, query, max_tokens, load_index=False) -> tuple[str, str, list]: verbose=self.verbose, messages=self.messages, max_tokens=max_tokens, - index_type=self.index_type + index_type=self.index_type, + prompt_template=self.prompt_template ) if not self.verbose: print("all_done!") diff --git a/knowledgegpt/utils/utils_completion.py b/knowledgegpt/utils/utils_completion.py index be749bf..01e5d97 100644 --- a/knowledgegpt/utils/utils_completion.py +++ b/knowledgegpt/utils/utils_completion.py @@ -28,7 +28,8 @@ def answer_query_with_context( is_turbo: str = False, messages: list = None, index_type: str = "basic", - max_tokens=1000 + max_tokens=1000, + prompt_template=None ) -> str: """ Answer a query using the provided context. @@ -55,7 +56,8 @@ def answer_query_with_context( embedding_type=embedding_type, model_lang=model_lang, max_tokens=max_tokens, - index_type=index_type + index_type=index_type, + prompt_template=prompt_template ) if is_turbo: messages.append({"role": "user", "content": prompt}) diff --git a/knowledgegpt/utils/utils_prompt.py b/knowledgegpt/utils/utils_prompt.py index 625749e..b0cf585 100644 --- a/knowledgegpt/utils/utils_prompt.py +++ b/knowledgegpt/utils/utils_prompt.py @@ -12,7 +12,7 @@ def construct_prompt(question: str, context_embeddings: dict, df: pd.DataFrame, embedding_type: str = "hf", - verbose=False, model_lang: str = "en", max_tokens=1000, index_type="basic") -> str: + verbose=False, model_lang: str = "en", max_tokens=1000, index_type="basic", prompt_template=None) -> str: """ Construct the prompt to be used for completion. :param question: The question to answer. @@ -51,9 +51,13 @@ def construct_prompt(question: str, context_embeddings: dict, df: pd.DataFrame, if not verbose: print(f"Selected {len(chosen_sections)} document sections:") print("\n".join(chosen_sections_indexes)) - if model_lang == "tr": - header = """Cümleyi doğru bir şekilde cevaplayın ve cevap metin içinde yoksa "bilmiyorum" diyin.\n\nMetin:\n""" - else: - header = """Answer the question as truthfully as possible using the provided context, and if the answer is not contained within the text below, say "I don't know."\n\nContext:\n""" - return header + "".join(chosen_sections) + "\n\n Q: " + question + "\n A:" + if prompt_template is None: + if model_lang == "tr": + header = """Cümleyi doğru bir şekilde cevaplayın ve cevap metin içinde yoksa "bilmiyorum" diyin.\n\nMetin:\n""" + else: + header = """Answer the question as truthfully as possible using the provided context, and if the answer is not contained within the text below, say "I don't know."\n\nContext:\n""" + + return header + "".join(chosen_sections) + "\n\n Q: " + question + "\n A:" + else: + return prompt_template.format(question=question, sections="".join(chosen_sections))