Merge pull request llmware-ai#707 from llmware-ai/hf-datasets-and-tab…

…ulate-optional-imports updating hf datasets as optional config examples tests
Aditijainnn · May 8, 2024 · 7ab651e · 7ab651e
2 parents dbf89df + 0d277a0
commit 7ab651e
Show file tree

Hide file tree

Showing 7 changed files with 82 additions and 18 deletions.
diff --git a/examples/Models/dragon_gguf_fast_start.py b/examples/Models/dragon_gguf_fast_start.py
@@ -1,10 +1,27 @@
 
-"""This example demonstrates running a 7B RAG-instruct fine-tuned DRAGON model locally on a laptop"""
+"""This example demonstrates running a 7B RAG-instruct fine-tuned DRAGON model locally on a laptop.
+
+    This example uses the RAG Benchmark test set, which can be pulled down from the LLMWare repository on
+    Huggingface at: www.huggingface.co/llmware/rag_instruct_benchmark_tester, or by using the
+     datasets library, which can be installed with:
+
+     `pip3 install datasets`
+
+"""
+
 
 import time
 from llmware.prompts import Prompt
-
-from datasets import load_dataset
+from llmware.exceptions import LLMWareException
+from importlib import util
+if not util.find_spec("datasets"):
+    raise LLMWareException(message="\nto run this example, you need to install HuggingFace datasets:  "
+                                    "`pip3 install datasets`")
+
+try:
+    from datasets import load_dataset
+except:
+    raise LLMWareException(message="Exception: datasets not found and required for example.")
 
 
 # Pull a 200 question RAG benchmark test dataset from llmware HuggingFace repo

diff --git a/examples/Models/dragon_rag_benchmark_tests_huggingface.py b/examples/Models/dragon_rag_benchmark_tests_huggingface.py
@@ -1,6 +1,13 @@
 
 """ This example demonstrates running a benchmarks set of tests against llmware DRAGON models
     https://huggingface.co/collections/llmware/dragon-models-65552d7648093c3f6e35d1bf
+
+        This example uses the RAG Benchmark test set, which can be pulled down from the LLMWare repository on
+    Huggingface at: www.huggingface.co/llmware/rag_instruct_benchmark_tester, or by using the
+     datasets library, which can be installed with:
+
+     `pip3 install datasets`
+
 """
 
 import time
@@ -11,7 +18,8 @@
 try:
     from datasets import load_dataset
 except ImportError:
-    raise ImportError ("This example requires the 'datasets' Python package. You can install it with 'pip install datasets'")
+    raise ImportError ("This example requires the 'datasets' Python package. "
+                       "You can install it with 'pip3 install datasets'")
 
 
 # Pull a 200 question RAG benchmark test dataset from llmware HuggingFace repo

diff --git a/examples/Models/dragon_rag_benchmark_tests_llmware.py b/examples/Models/dragon_rag_benchmark_tests_llmware.py
@@ -3,15 +3,24 @@
     https://huggingface.co/collections/llmware/dragon-models-65552d7648093c3f6e35d1bf
     The model loading and interaction is handled with the llmware Prompt class which provides additional
     capabilities like evidence checking
+
+        This example uses the RAG Benchmark test set, which can be pulled down from the LLMWare repository on
+    Huggingface at: www.huggingface.co/llmware/rag_instruct_benchmark_tester, or by using the
+     datasets library, which can be installed with:
+
+     `pip3 install datasets`
+
 """
 
 import time
 from llmware.prompts import Prompt
+
 # The datasets package is not installed automatically by llmware
 try:
     from datasets import load_dataset
 except ImportError:
-    raise ImportError ("This example requires the 'datasets' Python package. You can install it with 'pip install datasets'")
+    raise ImportError ("This example requires the 'datasets' Python package. "
+                       "You can install it with 'pip3 install datasets'")
 
 
 # Pull a 200 question RAG benchmark test dataset from llmware HuggingFace repo

diff --git a/examples/Models/llmware_model_fast_start.py b/examples/Models/llmware_model_fast_start.py
@@ -5,17 +5,29 @@
     Usage: You can pass in a model name:
         python llmware_model_fast_start.py llmware/bling-1b-0.1
     If you do not specify a model you will be prompted to pick one
+
+    This example uses the RAG Benchmark test set, which can be pulled down from the LLMWare repository on
+    Huggingface at: www.huggingface.co/llmware/rag_instruct_benchmark_tester, or by using the
+     datasets library, which can be installed with:
+
+     `pip3 install datasets`
+
 """
 
 import re
 import sys
 import time
 import torch
-from datasets import load_dataset
 from huggingface_hub import hf_api, ModelFilter, ModelCard
-from tabulate import tabulate
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
+# The datasets package is not installed automatically by llmware
+try:
+    from datasets import load_dataset
+except ImportError:
+    raise ImportError ("This example requires the 'datasets' Python package. "
+                       "You can install it with 'pip3 install datasets'")
+
 
 # Query HuggingFace and get the llmware models.  Return the the components of a table: headers and data
 def get_llmware_models():
@@ -38,13 +50,21 @@ def get_llmware_models():
 
 
 def print_llmware_models():
+
     table_headers, table_data = get_llmware_models()
-    print(tabulate(table_data, headers=table_headers, tablefmt="plain", numalign="right"))
+
+    print(table_headers[0], "\t\t", table_headers[1], "\t\t", table_headers[2])
+    for row in table_data:
+        print(row[0], "\t\t", row[1], "\t\t", row[2])
 
 
 def prompt_user_for_model_selection(prompt=None):
+
     table_headers, table_data = get_llmware_models()
-    print(tabulate(table_data, headers=table_headers, tablefmt="plain", numalign="right"))
+
+    print(table_headers[0], "\t\t", table_headers[1], "\t\t", table_headers[2])
+    for row in table_data:
+        print(row[0], "\t\t", row[1], "\t\t", row[2])
 
     num_models = len(table_data)
 

diff --git a/llmware/requirements.txt b/llmware/requirements.txt
@@ -1,22 +1,20 @@
 boto3==1.24.53  
-datasets==2.15.0   
-huggingface-hub==0.19.4  
 numpy>=1.23.2
 openai>=1.0
 pymongo>=4.7.0
-tabulate==0.9.0
-tokenizers>=0.15.0
 torch>=1.13.1
 transformers>=4.36.0
-word2number==1.1
 Wikipedia-API==0.6.0
 psycopg-binary==3.1.17
 psycopg==3.1.17
 pgvector==0.2.4  
 colorama==0.4.6
 einops==0.7.0
 librosa>=0.10.0
+word2number==1.1
 
+tokenizers>=0.15.0
+huggingface-hub==0.19.4  
 requests>=2.31.0
 tqdm~=4.66.1
 botocore~=1.27.96

diff --git a/setup.py b/setup.py
@@ -54,12 +54,10 @@ def glob_fix(package_name, glob):
     zip_safe=True,
     install_requires=[
         'boto3==1.24.53',
-        'datasets==2.15.0',
         'huggingface-hub==0.19.4',
         'numpy>=1.23.2',
         'openai>=1.0.0',
         'pymongo>=4.7.0',
-        'tabulate==0.9.0',
         'tokenizers>=0.15.0',
         'torch>=1.13.1',
         'transformers>=4.36.0',

diff --git a/tests/models/test_prompt_benchmark_test.py b/tests/models/test_prompt_benchmark_test.py
@@ -1,13 +1,27 @@
 
 """This runs a benchmark test dataset against a series of prompts.  It can be used to test any model type for
-    longer running series of prompts, as well as the fact-checking capability. """
+    longer running series of prompts, as well as the fact-checking capability.
+
+    This test uses the RAG Benchmark test set, which can be pulled down from the LLMWare repository on
+    Huggingface at: www.huggingface.co/llmware/rag_instruct_benchmark_tester, or by using the
+    datasets library, which can be installed with:
+
+     `pip3 install datasets`
+ """
 
 
 import time
 import random
 
 from llmware.prompts import Prompt
-from datasets import load_dataset
+
+# The datasets package is not installed automatically by llmware
+try:
+    from datasets import load_dataset
+except ImportError:
+    raise ImportError ("This test requires the 'datasets' Python package. "
+                       "You can install it with 'pip3 install datasets'")
+
 
 
 def load_rag_benchmark_tester_dataset():