Merge pull request #698 from llmware-ai/update-tests-models

updating model tests
llmware-ai · May 7, 2024 · a3043e6 · a3043e6
2 parents 0d4a82f + 03f5444
commit a3043e6
Show file tree

Hide file tree

Showing 9 changed files with 163 additions and 378 deletions.
diff --git a/tests/experimental/inference_server.py b/tests/experimental/inference_server.py
diff --git a/tests/models/test_all_generative_models.py b/tests/models/test_all_generative_models.py
diff --git a/tests/models/test_cloud_model_providers.py b/tests/models/test_cloud_model_providers.py
@@ -1,3 +1,5 @@
+""" Basic connectivity tests to cloud API providers. """
+
 import os
 from llmware.prompts import Prompt
 
@@ -7,39 +9,34 @@
 google_api_key    = os.environ.get("GOOGLE_API_KEY","") 
 cohere_api_key    = os.environ.get("COHERE_API_KEY", "")
 
+
 # Simple test to make sure we are reaching OpenAI
 def test_openai():
     prompter = Prompt(llm_name="gpt-4", llm_api_key=openai_api_key)
     response = prompter.completion("what is artificial intelligence?")
     llm_response = response["llm_response"]
     assert 'artificial' in llm_response.lower()
 
+
 # Simple test to make sure we are reaching Google
 def test_google():
     prompter = Prompt(llm_name="text-bison@001", llm_api_key=google_api_key)
     response = prompter.completion("what is artificial intelligence?")
     llm_response = response["llm_response"]
     assert 'artificial' in llm_response.lower()
 
+
 # Simple test to make sure we are reaching Anthropic
 def test_anthropic():
     prompter = Prompt(llm_name="claude-instant-v1", llm_api_key=anthropic_api_key)
     response = prompter.completion("what is artificial intelligence?")
     llm_response = response["llm_response"]
     assert 'artificial' in llm_response.lower()
 
+
 # Simple test to make sure we are reaching AI21
 def test_ai21():
     prompter = Prompt(llm_name="j2-grande-instruct", llm_api_key=ai21_api_key)
     response = prompter.completion("what is artificial intelligence?")
     llm_response = response["llm_response"]
     assert 'artificial' in llm_response.lower()
-
-# Simple test to make sure we are reaching Cohere. Disabling due to Cohere temporarily rate-limiting summarization for Trial accounts
-# def test_cohere():
-#     user_managed_secrets_setup()
-#     prompter = Prompt(llm_name="summarize-medium", llm_api_key=os.environ["USER_MANAGED_COHERE_API_KEY"])
-#     response = prompter.completion("what is artificial intelligence?")
-#     llm_response = response["llm_response"]
-#     print(llm_response)
-#     assert 'artificial' in llm_response.lower()
diff --git a/tests/models/test_gguf_model_load.py b/tests/models/test_gguf_model_load.py
@@ -0,0 +1,49 @@
+
+""" Test that GGUF models are loading correctly in local environment.  By default, will run through a series of
+    different GGUF models in the ModelCatalog to spot-check that the model is correctly loading and
+    successfully completing an inference:
+
+        #   tests several different underlying models:
+
+    #   bling-answer-tool           ->  tiny-llama (1b)
+    #   bling-phi-3-gguf            ->  phi-3 (3.8b)
+    #   dragon-yi-answer-tool       ->  yi (6b)
+    #   dragon-llama-answer-tool    ->  llama-2 (7b)
+    #   llama-2-7b-chat-gguf        ->  llama-2-chat (7b)
+    #   dragon-mistral-answer-tool  ->  mistral-1 (7b)
+
+    """
+
+
+from llmware.models import ModelCatalog
+
+
+def test_gguf_model_load():
+
+    # feel free to adapt this model list
+
+    model_list = ["bling-answer-tool",
+                  "bling-phi-3-gguf",
+                  "dragon-yi-answer-tool",
+                  "dragon-llama-answer-tool",
+                  "llama-2-7b-chat-gguf",
+                  "dragon-mistral-answer-tool"]
+
+    #   please note that the unusually short and simple prompt at times actually yields more variability in the model
+    #   response - we are only testing for successful loading and inference
+
+    sample_prompt = ("The company stock declined by $12 after poor earnings results."
+                     "\nHow much did the stock price decline?")
+
+    for model_name in model_list:
+
+        print("\nmodel name: ", model_name)
+
+        model = ModelCatalog().load_model(model_name, temperature=0.0, sample=False)
+
+        response = model.inference(sample_prompt)
+
+        print(f"{model_name} - response: ", response)
+
+        assert response is not None
+