Skip to content

Commit

Permalink
Merge pull request llmware-ai#707 from llmware-ai/hf-datasets-and-tab…
Browse files Browse the repository at this point in the history
…ulate-optional-imports

updating hf datasets as optional config examples tests
  • Loading branch information
doberst authored May 8, 2024
2 parents dbf89df + 0d277a0 commit 7ab651e
Show file tree
Hide file tree
Showing 7 changed files with 82 additions and 18 deletions.
23 changes: 20 additions & 3 deletions examples/Models/dragon_gguf_fast_start.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,27 @@

"""This example demonstrates running a 7B RAG-instruct fine-tuned DRAGON model locally on a laptop"""
"""This example demonstrates running a 7B RAG-instruct fine-tuned DRAGON model locally on a laptop.
This example uses the RAG Benchmark test set, which can be pulled down from the LLMWare repository on
Huggingface at: www.huggingface.co/llmware/rag_instruct_benchmark_tester, or by using the
datasets library, which can be installed with:
`pip3 install datasets`
"""


import time
from llmware.prompts import Prompt

from datasets import load_dataset
from llmware.exceptions import LLMWareException
from importlib import util
if not util.find_spec("datasets"):
raise LLMWareException(message="\nto run this example, you need to install HuggingFace datasets: "
"`pip3 install datasets`")

try:
from datasets import load_dataset
except:
raise LLMWareException(message="Exception: datasets not found and required for example.")


# Pull a 200 question RAG benchmark test dataset from llmware HuggingFace repo
Expand Down
10 changes: 9 additions & 1 deletion examples/Models/dragon_rag_benchmark_tests_huggingface.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@

""" This example demonstrates running a benchmarks set of tests against llmware DRAGON models
https://huggingface.co/collections/llmware/dragon-models-65552d7648093c3f6e35d1bf
This example uses the RAG Benchmark test set, which can be pulled down from the LLMWare repository on
Huggingface at: www.huggingface.co/llmware/rag_instruct_benchmark_tester, or by using the
datasets library, which can be installed with:
`pip3 install datasets`
"""

import time
Expand All @@ -11,7 +18,8 @@
try:
from datasets import load_dataset
except ImportError:
raise ImportError ("This example requires the 'datasets' Python package. You can install it with 'pip install datasets'")
raise ImportError ("This example requires the 'datasets' Python package. "
"You can install it with 'pip3 install datasets'")


# Pull a 200 question RAG benchmark test dataset from llmware HuggingFace repo
Expand Down
11 changes: 10 additions & 1 deletion examples/Models/dragon_rag_benchmark_tests_llmware.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,24 @@
https://huggingface.co/collections/llmware/dragon-models-65552d7648093c3f6e35d1bf
The model loading and interaction is handled with the llmware Prompt class which provides additional
capabilities like evidence checking
This example uses the RAG Benchmark test set, which can be pulled down from the LLMWare repository on
Huggingface at: www.huggingface.co/llmware/rag_instruct_benchmark_tester, or by using the
datasets library, which can be installed with:
`pip3 install datasets`
"""

import time
from llmware.prompts import Prompt

# The datasets package is not installed automatically by llmware
try:
from datasets import load_dataset
except ImportError:
raise ImportError ("This example requires the 'datasets' Python package. You can install it with 'pip install datasets'")
raise ImportError ("This example requires the 'datasets' Python package. "
"You can install it with 'pip3 install datasets'")


# Pull a 200 question RAG benchmark test dataset from llmware HuggingFace repo
Expand Down
28 changes: 24 additions & 4 deletions examples/Models/llmware_model_fast_start.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,29 @@
Usage: You can pass in a model name:
python llmware_model_fast_start.py llmware/bling-1b-0.1
If you do not specify a model you will be prompted to pick one
This example uses the RAG Benchmark test set, which can be pulled down from the LLMWare repository on
Huggingface at: www.huggingface.co/llmware/rag_instruct_benchmark_tester, or by using the
datasets library, which can be installed with:
`pip3 install datasets`
"""

import re
import sys
import time
import torch
from datasets import load_dataset
from huggingface_hub import hf_api, ModelFilter, ModelCard
from tabulate import tabulate
from transformers import AutoModelForCausalLM, AutoTokenizer

# The datasets package is not installed automatically by llmware
try:
from datasets import load_dataset
except ImportError:
raise ImportError ("This example requires the 'datasets' Python package. "
"You can install it with 'pip3 install datasets'")


# Query HuggingFace and get the llmware models. Return the the components of a table: headers and data
def get_llmware_models():
Expand All @@ -38,13 +50,21 @@ def get_llmware_models():


def print_llmware_models():

table_headers, table_data = get_llmware_models()
print(tabulate(table_data, headers=table_headers, tablefmt="plain", numalign="right"))

print(table_headers[0], "\t\t", table_headers[1], "\t\t", table_headers[2])
for row in table_data:
print(row[0], "\t\t", row[1], "\t\t", row[2])


def prompt_user_for_model_selection(prompt=None):

table_headers, table_data = get_llmware_models()
print(tabulate(table_data, headers=table_headers, tablefmt="plain", numalign="right"))

print(table_headers[0], "\t\t", table_headers[1], "\t\t", table_headers[2])
for row in table_data:
print(row[0], "\t\t", row[1], "\t\t", row[2])

num_models = len(table_data)

Expand Down
8 changes: 3 additions & 5 deletions llmware/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,22 +1,20 @@
boto3==1.24.53
datasets==2.15.0
huggingface-hub==0.19.4
numpy>=1.23.2
openai>=1.0
pymongo>=4.7.0
tabulate==0.9.0
tokenizers>=0.15.0
torch>=1.13.1
transformers>=4.36.0
word2number==1.1
Wikipedia-API==0.6.0
psycopg-binary==3.1.17
psycopg==3.1.17
pgvector==0.2.4
colorama==0.4.6
einops==0.7.0
librosa>=0.10.0
word2number==1.1

tokenizers>=0.15.0
huggingface-hub==0.19.4
requests>=2.31.0
tqdm~=4.66.1
botocore~=1.27.96
Expand Down
2 changes: 0 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,10 @@ def glob_fix(package_name, glob):
zip_safe=True,
install_requires=[
'boto3==1.24.53',
'datasets==2.15.0',
'huggingface-hub==0.19.4',
'numpy>=1.23.2',
'openai>=1.0.0',
'pymongo>=4.7.0',
'tabulate==0.9.0',
'tokenizers>=0.15.0',
'torch>=1.13.1',
'transformers>=4.36.0',
Expand Down
18 changes: 16 additions & 2 deletions tests/models/test_prompt_benchmark_test.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,27 @@

"""This runs a benchmark test dataset against a series of prompts. It can be used to test any model type for
longer running series of prompts, as well as the fact-checking capability. """
longer running series of prompts, as well as the fact-checking capability.
This test uses the RAG Benchmark test set, which can be pulled down from the LLMWare repository on
Huggingface at: www.huggingface.co/llmware/rag_instruct_benchmark_tester, or by using the
datasets library, which can be installed with:
`pip3 install datasets`
"""


import time
import random

from llmware.prompts import Prompt
from datasets import load_dataset

# The datasets package is not installed automatically by llmware
try:
from datasets import load_dataset
except ImportError:
raise ImportError ("This test requires the 'datasets' Python package. "
"You can install it with 'pip3 install datasets'")



def load_rag_benchmark_tester_dataset():
Expand Down

0 comments on commit 7ab651e

Please sign in to comment.