Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

updating model tests #698

Merged
merged 1 commit into from
May 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
updating model tests
  • Loading branch information
DARREN OBERST authored and DARREN OBERST committed May 7, 2024
commit 03f54444afb14fe776f56220ec7c8a06c0b26cb0
206 changes: 0 additions & 206 deletions tests/experimental/inference_server.py

This file was deleted.

53 changes: 0 additions & 53 deletions tests/models/test_all_generative_models.py

This file was deleted.

15 changes: 6 additions & 9 deletions tests/models/test_cloud_model_providers.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
""" Basic connectivity tests to cloud API providers. """

import os
from llmware.prompts import Prompt

Expand All @@ -7,39 +9,34 @@
google_api_key = os.environ.get("GOOGLE_API_KEY","")
cohere_api_key = os.environ.get("COHERE_API_KEY", "")


# Simple test to make sure we are reaching OpenAI
def test_openai():
prompter = Prompt(llm_name="gpt-4", llm_api_key=openai_api_key)
response = prompter.completion("what is artificial intelligence?")
llm_response = response["llm_response"]
assert 'artificial' in llm_response.lower()


# Simple test to make sure we are reaching Google
def test_google():
prompter = Prompt(llm_name="text-bison@001", llm_api_key=google_api_key)
response = prompter.completion("what is artificial intelligence?")
llm_response = response["llm_response"]
assert 'artificial' in llm_response.lower()


# Simple test to make sure we are reaching Anthropic
def test_anthropic():
prompter = Prompt(llm_name="claude-instant-v1", llm_api_key=anthropic_api_key)
response = prompter.completion("what is artificial intelligence?")
llm_response = response["llm_response"]
assert 'artificial' in llm_response.lower()


# Simple test to make sure we are reaching AI21
def test_ai21():
prompter = Prompt(llm_name="j2-grande-instruct", llm_api_key=ai21_api_key)
response = prompter.completion("what is artificial intelligence?")
llm_response = response["llm_response"]
assert 'artificial' in llm_response.lower()

# Simple test to make sure we are reaching Cohere. Disabling due to Cohere temporarily rate-limiting summarization for Trial accounts
# def test_cohere():
# user_managed_secrets_setup()
# prompter = Prompt(llm_name="summarize-medium", llm_api_key=os.environ["USER_MANAGED_COHERE_API_KEY"])
# response = prompter.completion("what is artificial intelligence?")
# llm_response = response["llm_response"]
# print(llm_response)
# assert 'artificial' in llm_response.lower()
49 changes: 49 additions & 0 deletions tests/models/test_gguf_model_load.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@

""" Test that GGUF models are loading correctly in local environment. By default, will run through a series of
different GGUF models in the ModelCatalog to spot-check that the model is correctly loading and
successfully completing an inference:

# tests several different underlying models:

# bling-answer-tool -> tiny-llama (1b)
# bling-phi-3-gguf -> phi-3 (3.8b)
# dragon-yi-answer-tool -> yi (6b)
# dragon-llama-answer-tool -> llama-2 (7b)
# llama-2-7b-chat-gguf -> llama-2-chat (7b)
# dragon-mistral-answer-tool -> mistral-1 (7b)

"""


from llmware.models import ModelCatalog


def test_gguf_model_load():

# feel free to adapt this model list

model_list = ["bling-answer-tool",
"bling-phi-3-gguf",
"dragon-yi-answer-tool",
"dragon-llama-answer-tool",
"llama-2-7b-chat-gguf",
"dragon-mistral-answer-tool"]

# please note that the unusually short and simple prompt at times actually yields more variability in the model
# response - we are only testing for successful loading and inference

sample_prompt = ("The company stock declined by $12 after poor earnings results."
"\nHow much did the stock price decline?")

for model_name in model_list:

print("\nmodel name: ", model_name)

model = ModelCatalog().load_model(model_name, temperature=0.0, sample=False)

response = model.inference(sample_prompt)

print(f"{model_name} - response: ", response)

assert response is not None

Loading