Skip to content

Commit

Permalink
Merge branch 'master' into image-generation-agents
Browse files Browse the repository at this point in the history
  • Loading branch information
kaanozbudak authored Apr 15, 2023
2 parents dffa8a8 + 87ade2d commit 7243eaa
Show file tree
Hide file tree
Showing 4 changed files with 406 additions and 2 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,4 @@ examples/calculated_indexes/*
static_files/*_test.*
examples/example_config.py
output_images/
.chroma
384 changes: 384 additions & 0 deletions examples/chroma_example.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,384 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/eren/opt/anaconda3/envs/knowledgegpt-env/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n",
"/Users/eren/opt/anaconda3/envs/knowledgegpt-env/lib/python3.9/site-packages/pydub/utils.py:170: RuntimeWarning: Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work\n",
" warn(\"Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work\", RuntimeWarning)\n"
]
}
],
"source": [
"from knowledgegpt.extractors.base_extractor import BaseExtractor\n",
"from knowledgegpt.utils.utils_scrape import scrape_content"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import openai\n",
"from example_config import SECRET_KEY\n",
"openai.api_key = SECRET_KEY"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"df = scrape_content(\"https://en.wikipedia.org/wiki/Bombard_(weapon)\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"basic_extractor_chroma = BaseExtractor(df, embedding_extractor=\"hf\", model_lang=\"en\", is_turbo=True, index_type=\"chroma\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Computing embeddings...\n",
"model_lang en\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Using embedded DuckDB without persistence: data will be transient\n",
"No embedding_function provided, using default embedding function: SentenceTransformerEmbeddingFunction\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"DONE, CHROMA\n",
"Selected 3 document sections:\n",
"0\n",
"11\n",
"32\n",
"Answer the question as truthfully as possible using the provided context, and if the answer is not contained within the text below, say \"I don't know.\"\n",
"\n",
"Context:\n",
"\n",
"* Bombard (weapon) - Wikipedia\n",
"* Bombard (weapon)\n",
"* Wikimedia Commons has media related to Bombards (weapon).\n",
"\n",
" Q: What is a bombard? Where were they used?\n",
" A:\n",
"all_done!\n"
]
}
],
"source": [
"answer, prompt, messages =basic_extractor_chroma.extract(\"What is a bombard? Where were they used?\", max_tokens=200)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'A bombard is a type of weapon. It is a large caliber, muzzle-loading artillery piece used in the Middle Ages and the early modern period. They were used in various parts of the world, including Europe and Asia.'"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"answer"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"What is a bombard? Where were they used? What is the stronger aspects\n",
"all_done!\n"
]
}
],
"source": [
"answer, prompt, messages =basic_extractor_chroma.extract(\"What is a bombard? Where were they used? What is the stronger aspects\", max_tokens=400,)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'A bombard is a type of weapon, a large caliber, muzzle-loading artillery piece used in the Middle Ages and the early modern period. They were used in various parts of the world, including Europe and Asia. \\n\\nIn terms of stronger aspects, bombards were known for their ability to fire large projectiles over long distances, making them effective siege weapons. They were also capable of causing significant damage to fortifications and other structures. However, they were heavy and difficult to move, and their rate of fire was relatively slow compared to other types of artillery.'"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"answer"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"model_lang en\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Using embedded DuckDB without persistence: data will be transient\n",
"No embedding_function provided, using default embedding function: SentenceTransformerEmbeddingFunction\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"DONE, CHROMA\n",
"Selected 4 document sections:\n",
"0\n",
"11\n",
"32\n",
"16\n",
"Answer the question as truthfully as possible using the provided context, and if the answer is not contained within the text below, say \"I don't know.\"\n",
"\n",
"Context:\n",
"\n",
"* Bombard (weapon) - Wikipedia\n",
"* Bombard (weapon)\n",
"* Wikimedia Commons has media related to Bombards (weapon).\n",
"* The bombard is a type of cannon or mortar which was used throughout the Middle Ages and the early modern period. Bombards were mainly large calibre, muzzle-loading artillery pieces used during sieges to shoot round stone projectiles at the walls of enemy fortifications, enabling troops to break in. Most bombards were made of iron and used gunpowder to launch the projectiles.[1] There are many examples of bombards, including Mons Meg, the Dardanelles Gun, and the handheld bombard. Bombard mortar and granite ball projectile of the Knights of Saint John of Jerusalem, Rhodes, 1480–1500. Founded at the request of Pierre d'Aubusson, the bombard was used for close defense of the walls (100–200 meters) at the Siege of Rhodes. It fired 260 kg granite balls. The bombard weighs about 3,325 kg. Musée de l'Armée.The weapon provided the name to the Royal Artillery rank of bombardier and the word bombardment.\n",
"\n",
" Q: What is a bombard? Where were they used? What is the stronger aspects\n",
" A:\n",
"all_done!\n"
]
}
],
"source": [
"answer, prompt, messages =basic_extractor_chroma.extract(\"What is a bombard? Where were they used? What is the stronger aspects\", max_tokens=400, context_restarter=True)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"model_lang en\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Using embedded DuckDB without persistence: data will be transient\n",
"No embedding_function provided, using default embedding function: SentenceTransformerEmbeddingFunction\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"DONE, CHROMA\n",
"Selected 10 document sections:\n",
"11\n",
"0\n",
"45\n",
"30\n",
"37\n",
"42\n",
"27\n",
"32\n",
"20\n",
"26\n",
"Answer the question as truthfully as possible using the provided context, and if the answer is not contained within the text below, say \"I don't know.\"\n",
"\n",
"Context:\n",
"\n",
"* Bombard (weapon)\n",
"* Bombard (weapon) - Wikipedia\n",
"* Retrieved from \"https://en.wikipedia.org/w/index.php?title=Bombard_(weapon)&oldid=1141599306\"\n",
"* Bombard in its siege position, Denmark.\n",
"* ^ Gwei-Djen, Lu; Needham, Joseph; Chi-Hsing, Phan (July 1988). \"The Oldest Representation of a Bombard\". Technology and Culture. 29 (3): 594–605. doi:10.2307/3105275. JSTOR 3105275.\n",
"* ^ \"Bodiam Bombard | Kent and Sussex Courier\". Archived from the original on 2015-09-23. Retrieved 2015-04-13.\n",
"* Early Ming bombard with two pair of trunnions, 1377 AD.\n",
"* Wikimedia Commons has media related to Bombards (weapon).\n",
"* \"Hand bombard\", 1390–1400\n",
"* Bombard from the beginning of the 15th century, the only surviving bombard used by Teutonic Knights, now exposed in Kwidzyn Castle.\n",
"\n",
" Q: What is the first release date for Bombard?\n",
" A:\n",
"all_done!\n"
]
}
],
"source": [
"answer, prompt, messages =basic_extractor_chroma.extract(\"What is the first release date for Bombard?\", max_tokens=400, context_restarter=True)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"\"I don't know. The provided context does not contain information about the release date of Bombard.\""
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"answer"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"model_lang en\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Using embedded DuckDB without persistence: data will be transient\n",
"No embedding_function provided, using default embedding function: SentenceTransformerEmbeddingFunction\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"DONE, CHROMA\n",
"Selected 3 document sections:\n",
"0\n",
"11\n",
"37\n",
"Answer the question as truthfully as possible using the provided context, and if the answer is not contained within the text below, say \"I don't know.\"\n",
"\n",
"Context:\n",
"\n",
"* Bombard (weapon) - Wikipedia\n",
"* Bombard (weapon)\n",
"* ^ Gwei-Djen, Lu; Needham, Joseph; Chi-Hsing, Phan (July 1988). \"The Oldest Representation of a Bombard\". Technology and Culture. 29 (3): 594–605. doi:10.2307/3105275. JSTOR 3105275.\n",
"\n",
" Q: What is the time period the Bombard was used most actively?\n",
" A:\n",
"all_done!\n"
]
}
],
"source": [
"answer, prompt, messages =basic_extractor_chroma.extract(\"What is the time period the Bombard was used most actively?\", max_tokens=800, context_restarter=True)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'The Bombard was used most actively throughout the Middle Ages and the early modern period.'"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"answer"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "knowledgegpt-env",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "d0285ce201951a37668925b1b7de032ac1583adb61d048d8a5dd45351727e09e"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
2 changes: 1 addition & 1 deletion knowledgegpt/extractors/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def check_model_lang(model_lang, model_lang_acceptable_list=None):

def check_index_type(index_type, index_type_acceptable_list=None):
if index_type_acceptable_list is None:
index_type_acceptable_list = ["basic", "faiss"]
index_type_acceptable_list = ["basic", "faiss", "chroma"]

if not isinstance(index_type, str):
raise Exception("Index Type must be a string")
Expand Down
Loading

0 comments on commit 7243eaa

Please sign in to comment.