Skip to content

Commit

Permalink
Merge pull request #125 from NVJCameron/jcameron/update-nv-endpoints
Browse files Browse the repository at this point in the history
update nv endpoints
  • Loading branch information
juberti authored Oct 7, 2024
2 parents 0a513a5 + a9b57f5 commit 6bf7237
Showing 1 changed file with 4 additions and 9 deletions.
13 changes: 4 additions & 9 deletions llm_benchmark_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ def _text_models():
"accounts/fireworks/models/mixtral-8x7b-instruct-hf", MIXTRAL_8X7B_INSTRUCT
),
_GroqLlm("mixtral-8x7b-32768", MIXTRAL_8X7B_INSTRUCT_FP8),
_NvidiaLlm("mistralai/mixtral-8x7b-instruct-v0.1", MIXTRAL_8X7B_INSTRUCT),
_NvidiaLlm("mistralai/mixtral-8x7b-instruct-v0.1-turbo", MIXTRAL_8X7B_INSTRUCT_FP8),
_TogetherLlm("mistralai/Mixtral-8x7B-Instruct-v0.1", MIXTRAL_8X7B_INSTRUCT),
# Llama 3.1 405b
_DatabricksLlm("databricks-meta-llama-3.1-405b-instruct", LLAMA_31_405B_CHAT),
Expand All @@ -361,7 +361,7 @@ def _text_models():
"accounts/fireworks/models/llama-v3p1-405b-instruct", LLAMA_31_405B_CHAT_FP8
),
_GroqLlm("llama-3.1-405b-reasoning", LLAMA_31_405B_CHAT_FP8),
_NvidiaLlm("meta/llama-3.1-405b-instruct", LLAMA_31_405B_CHAT),
_NvidiaLlm("meta/llama-3.1-405b-instruct-turbo", LLAMA_31_405B_CHAT_FP8),
_TogetherLlm(
"meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo", LLAMA_31_405B_CHAT_FP8
),
Expand All @@ -375,7 +375,7 @@ def _text_models():
"accounts/fireworks/models/llama-v3p1-70b-instruct", LLAMA_31_70B_CHAT_FP8
),
_GroqLlm("llama-3.1-70b-versatile", LLAMA_31_70B_CHAT_FP8),
_NvidiaLlm("meta/llama-3.1-70b-instruct", LLAMA_31_70B_CHAT),
_NvidiaLlm("meta/llama-3.1-70b-instruct-turbo", LLAMA_31_70B_CHAT_FP8),
_PerplexityLlm("llama-3.1-70b-instruct", LLAMA_31_70B_CHAT),
_TogetherLlm(
"meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", LLAMA_31_70B_CHAT_FP8
Expand All @@ -390,7 +390,7 @@ def _text_models():
"accounts/fireworks/models/llama-v3p1-8b-instruct", LLAMA_31_8B_CHAT_FP8
),
_GroqLlm("llama-3.1-8b-instant", LLAMA_31_8B_CHAT_FP8),
_NvidiaLlm("meta/llama-3.1-8b-instruct", LLAMA_31_8B_CHAT),
_NvidiaLlm("meta/llama-3.1-8b-instruct-turbo", LLAMA_31_8B_CHAT_FP8),
_PerplexityLlm("llama-3.1-8b-instruct", LLAMA_31_8B_CHAT),
_TogetherLlm(
"meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", LLAMA_31_8B_CHAT_FP8
Expand All @@ -406,7 +406,6 @@ def _text_models():
"accounts/fireworks/models/llama-v3-70b-instruct-hf", LLAMA_3_70B_CHAT
),
_GroqLlm("llama3-70b-8192", LLAMA_3_70B_CHAT_FP8),
_NvidiaLlm("meta/llama3-70b-instruct", LLAMA_3_70B_CHAT),
_TogetherLlm("meta-llama/Llama-3-70b-chat-hf", LLAMA_3_70B_CHAT),
_TogetherLlm(
"meta-llama/Meta-Llama-3-70B-Instruct-Turbo", LLAMA_3_70B_CHAT_FP8
Expand All @@ -428,7 +427,6 @@ def _text_models():
"accounts/fireworks/models/llama-v3-8b-instruct-hf", LLAMA_3_8B_CHAT
),
_GroqLlm("llama3-8b-8192", LLAMA_3_8B_CHAT_FP8),
_NvidiaLlm("meta/llama3-8b-instruct", LLAMA_3_8B_CHAT),
_TogetherLlm("meta-llama/Llama-3-8b-chat-hf", LLAMA_3_8B_CHAT),
_TogetherLlm("meta-llama/Meta-Llama-3-8B-Instruct-Turbo", LLAMA_3_8B_CHAT_FP8),
_TogetherLlm("meta-llama/Meta-Llama-3-8B-Instruct-Lite", LLAMA_3_8B_CHAT_FP4),
Expand Down Expand Up @@ -460,11 +458,8 @@ def _tools_models():
# "accounts/fireworks/models/llama-v3p1-405b-instruct", LLAMA_31_405B_CHAT_FP8
# ), returns "FUNCTION" and the call as text
_GroqLlm("llama-3.1-405b-reasoning", LLAMA_31_405B_CHAT_FP8),
_NvidiaLlm("meta/llama-3.1-405b-instruct", LLAMA_31_405B_CHAT),
_GroqLlm("llama-3.1-70b-versatile", LLAMA_31_70B_CHAT_FP8),
_NvidiaLlm("meta/llama-3.1-70b-instruct", LLAMA_31_70B_CHAT),
_GroqLlm("llama-3.1-8b-instant", LLAMA_31_8B_CHAT_FP8),
_NvidiaLlm("meta/llama-3.1-8b-instruct", LLAMA_31_8B_CHAT),
_GroqLlm("llama3-groq-70b-8192-tool-use-preview"),
_GroqLlm("llama3-groq-8b-8192-tool-use-preview"),
]
Expand Down

0 comments on commit 6bf7237

Please sign in to comment.