Merge pull request #125 from NVJCameron/jcameron/update-nv-endpoints

update nv endpoints
fixie-ai · Oct 7, 2024 · 6bf7237 · 6bf7237
2 parents 0a513a5 + a9b57f5
commit 6bf7237
Showing 1 changed file with 4 additions and 9 deletions.
diff --git a/llm_benchmark_suite.py b/llm_benchmark_suite.py
@@ -350,7 +350,7 @@ def _text_models():
             "accounts/fireworks/models/mixtral-8x7b-instruct-hf", MIXTRAL_8X7B_INSTRUCT
         ),
         _GroqLlm("mixtral-8x7b-32768", MIXTRAL_8X7B_INSTRUCT_FP8),
-        _NvidiaLlm("mistralai/mixtral-8x7b-instruct-v0.1", MIXTRAL_8X7B_INSTRUCT),
+        _NvidiaLlm("mistralai/mixtral-8x7b-instruct-v0.1-turbo", MIXTRAL_8X7B_INSTRUCT_FP8),
         _TogetherLlm("mistralai/Mixtral-8x7B-Instruct-v0.1", MIXTRAL_8X7B_INSTRUCT),
         # Llama 3.1 405b
         _DatabricksLlm("databricks-meta-llama-3.1-405b-instruct", LLAMA_31_405B_CHAT),
@@ -361,7 +361,7 @@ def _text_models():
             "accounts/fireworks/models/llama-v3p1-405b-instruct", LLAMA_31_405B_CHAT_FP8
         ),
         _GroqLlm("llama-3.1-405b-reasoning", LLAMA_31_405B_CHAT_FP8),
-        _NvidiaLlm("meta/llama-3.1-405b-instruct", LLAMA_31_405B_CHAT),
+        _NvidiaLlm("meta/llama-3.1-405b-instruct-turbo", LLAMA_31_405B_CHAT_FP8),
         _TogetherLlm(
             "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo", LLAMA_31_405B_CHAT_FP8
         ),
@@ -375,7 +375,7 @@ def _text_models():
             "accounts/fireworks/models/llama-v3p1-70b-instruct", LLAMA_31_70B_CHAT_FP8
         ),
         _GroqLlm("llama-3.1-70b-versatile", LLAMA_31_70B_CHAT_FP8),
-        _NvidiaLlm("meta/llama-3.1-70b-instruct", LLAMA_31_70B_CHAT),
+        _NvidiaLlm("meta/llama-3.1-70b-instruct-turbo", LLAMA_31_70B_CHAT_FP8),
         _PerplexityLlm("llama-3.1-70b-instruct", LLAMA_31_70B_CHAT),
         _TogetherLlm(
             "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", LLAMA_31_70B_CHAT_FP8
@@ -390,7 +390,7 @@ def _text_models():
             "accounts/fireworks/models/llama-v3p1-8b-instruct", LLAMA_31_8B_CHAT_FP8
         ),
         _GroqLlm("llama-3.1-8b-instant", LLAMA_31_8B_CHAT_FP8),
-        _NvidiaLlm("meta/llama-3.1-8b-instruct", LLAMA_31_8B_CHAT),
+        _NvidiaLlm("meta/llama-3.1-8b-instruct-turbo", LLAMA_31_8B_CHAT_FP8),
         _PerplexityLlm("llama-3.1-8b-instruct", LLAMA_31_8B_CHAT),
         _TogetherLlm(
             "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", LLAMA_31_8B_CHAT_FP8
@@ -406,7 +406,6 @@ def _text_models():
             "accounts/fireworks/models/llama-v3-70b-instruct-hf", LLAMA_3_70B_CHAT
         ),
         _GroqLlm("llama3-70b-8192", LLAMA_3_70B_CHAT_FP8),
-        _NvidiaLlm("meta/llama3-70b-instruct", LLAMA_3_70B_CHAT),
         _TogetherLlm("meta-llama/Llama-3-70b-chat-hf", LLAMA_3_70B_CHAT),
         _TogetherLlm(
             "meta-llama/Meta-Llama-3-70B-Instruct-Turbo", LLAMA_3_70B_CHAT_FP8
@@ -428,7 +427,6 @@ def _text_models():
             "accounts/fireworks/models/llama-v3-8b-instruct-hf", LLAMA_3_8B_CHAT
         ),
         _GroqLlm("llama3-8b-8192", LLAMA_3_8B_CHAT_FP8),
-        _NvidiaLlm("meta/llama3-8b-instruct", LLAMA_3_8B_CHAT),
         _TogetherLlm("meta-llama/Llama-3-8b-chat-hf", LLAMA_3_8B_CHAT),
         _TogetherLlm("meta-llama/Meta-Llama-3-8B-Instruct-Turbo", LLAMA_3_8B_CHAT_FP8),
         _TogetherLlm("meta-llama/Meta-Llama-3-8B-Instruct-Lite", LLAMA_3_8B_CHAT_FP4),
@@ -460,11 +458,8 @@ def _tools_models():
         #    "accounts/fireworks/models/llama-v3p1-405b-instruct", LLAMA_31_405B_CHAT_FP8
         # ), returns "FUNCTION" and the call as text
         _GroqLlm("llama-3.1-405b-reasoning", LLAMA_31_405B_CHAT_FP8),
-        _NvidiaLlm("meta/llama-3.1-405b-instruct", LLAMA_31_405B_CHAT),
         _GroqLlm("llama-3.1-70b-versatile", LLAMA_31_70B_CHAT_FP8),
-        _NvidiaLlm("meta/llama-3.1-70b-instruct", LLAMA_31_70B_CHAT),
         _GroqLlm("llama-3.1-8b-instant", LLAMA_31_8B_CHAT_FP8),
-        _NvidiaLlm("meta/llama-3.1-8b-instruct", LLAMA_31_8B_CHAT),
         _GroqLlm("llama3-groq-70b-8192-tool-use-preview"),
         _GroqLlm("llama3-groq-8b-8192-tool-use-preview"),
     ]