ShishirPatil · ShishirPatil · Jul 26, 2024 · Jul 12, 2024 · Jul 12, 2024 · Jul 12, 2024
diff --git a/berkeley-function-call-leaderboard/README.md b/berkeley-function-call-leaderboard/README.md
@@ -59,6 +59,7 @@ export FIRE_WORKS_API_KEY=XXXXXX
 export ANTHROPIC_API_KEY=XXXXXX
 export COHERE_API_KEY=XXXXXX
 export NVIDIA_API_KEY=nvapi-XXXXXX
+export YI_API_KEY=XXXXXX
 ```
 
 If decided to run OSS model, the generation script uses vllm and therefore requires GPU for hosting and inferencing. If you have questions or concerns about evaluating OSS models, please reach out to us in our [discord channel](https://discord.gg/grXXvj9Whz).
@@ -116,6 +117,7 @@ Below is *a table of models we support* to run our leaderboard evaluation agains
 |nvidia/nemotron-4-340b-instruct| Prompt|
 |THUDM/glm-4-9b-chat 💻| Function Calling|
 |ibm-granite/granite-20b-functioncalling 💻| Function Calling|
+|yi-large-fc | Function Calling|
 
 Here {MODEL} 💻 means the model needs to be hosted locally and called by vllm, {MODEL} means the models that are called API calls. For models with a trailing `-FC`, it means that the model supports function-calling feature. You can check out the table summarizing feature supports among different models [here](https://gorilla.cs.berkeley.edu/blogs/8_berkeley_function_calling_leaderboard.html#prompt).
 

diff --git a/berkeley-function-call-leaderboard/eval_checker/eval_runner_helper.py b/berkeley-function-call-leaderboard/eval_checker/eval_runner_helper.py
@@ -396,6 +396,12 @@
         "THUDM",
         "glm-4",
     ],
+    "yi-large-fc": [
+        "yi-large (FC)",
+        "https://platform.01.ai/",
+        "01.AI",
+        "Proprietary",
+    ],
 }
 
 INPUT_PRICE_PER_MILLION_TOKEN = {
@@ -437,6 +443,7 @@
     "command-r-plus": 3,
     "command-r-plus-FC-optimized": 3,
     "command-r-plus-optimized": 3,
+    "yi-large-fc": 3,
 }
 
 OUTPUT_PRICE_PER_MILLION_TOKEN = {
@@ -478,6 +485,7 @@
     "command-r-plus": 15,
     "command-r-plus-FC-optimized": 15,
     "command-r-plus-optimized": 15,
+    "yi-large-fc": 3,
 }
 
 # The latency of the open-source models are hardcoded here.

diff --git a/berkeley-function-call-leaderboard/model_handler/constant.py b/berkeley-function-call-leaderboard/model_handler/constant.py
@@ -147,6 +147,7 @@
     "command-r-plus-FC-optimized",
     "THUDM/glm-4-9b-chat",
     "ibm-granite/granite-20b-functioncalling",
+    "yi-large-fc",
 ]
 
 TEST_CATEGORIES = {

diff --git a/berkeley-function-call-leaderboard/model_handler/handler_map.py b/berkeley-function-call-leaderboard/model_handler/handler_map.py
@@ -19,6 +19,8 @@
 from model_handler.granite_handler import GraniteHandler
 from model_handler.nvidia_handler import NvidiaHandler
 from model_handler.glm_handler import GLMHandler
+from model_handler.yi_handler import YiHandler
+
 
 handler_map = {
     "gorilla-openfunctions-v0": GorillaHandler,
@@ -80,5 +82,6 @@
     "snowflake/arctic": ArcticHandler,
     "ibm-granite/granite-20b-functioncalling": GraniteHandler,
     "nvidia/nemotron-4-340b-instruct": NvidiaHandler,
-    "THUDM/glm-4-9b-chat": GLMHandler
+    "THUDM/glm-4-9b-chat": GLMHandler,
+    "yi-large-fc": YiHandler,
 }
diff --git a/berkeley-function-call-leaderboard/model_handler/yi_handler.py b/berkeley-function-call-leaderboard/model_handler/yi_handler.py
@@ -0,0 +1,81 @@
+from model_handler.handler import BaseHandler
+from model_handler.model_style import ModelStyle
+from model_handler.utils import (
+    convert_to_tool,
+    convert_to_function_call,
+    augment_prompt_by_languge,
+    language_specific_pre_processing,
+)
+from model_handler.constant import GORILLA_TO_OPENAPI
+from openai import OpenAI
+import os, time, json
+
+
+class YiHandler(BaseHandler):
+    def __init__(self, model_name, temperature=0.0, top_p=1, max_tokens=1000) -> None:
+        super().__init__(model_name, temperature, top_p, max_tokens)
+        self.model_style = ModelStyle.OpenAI
+        self.base_url = "https://api.lingyiwanwu.com/v1"
+        self.client = OpenAI(base_url=self.base_url, api_key=os.getenv("YI_API_KEY"))
+
+    def inference(self, prompt, functions, test_category):
+        prompt = augment_prompt_by_languge(prompt, test_category)
+        functions = language_specific_pre_processing(functions, test_category)
+        if type(functions) is not list:
+            functions = [functions]
+
+        message = [{"role": "user", "content": "Questions:" + prompt}]
+        oai_tool = convert_to_tool(
+            functions, GORILLA_TO_OPENAPI, self.model_style, test_category
+        )
+        start_time = time.time()
+        if len(oai_tool) > 0:
+            response = self.client.chat.completions.create(
+                messages=message,
+                model=self.model_name,
+                temperature=self.temperature,
+                max_tokens=self.max_tokens,
+                top_p=self.top_p,
+                tools=oai_tool,
+            )
+        else:
+            response = self.client.chat.completions.create(
+                messages=message,
+                model=self.model_name,
+                temperature=self.temperature,
+                max_tokens=self.max_tokens,
+                top_p=self.top_p,
+            )
+        latency = time.time() - start_time
+        try:
+            result = [
+                {func_call.function.name: func_call.function.arguments}
+                for func_call in response.choices[0].message.tool_calls
+            ]
+        except Exception as e:
+            result = response.choices[0].message.content
+
+        metadata = {}
+        metadata["input_tokens"] = response.usage.prompt_tokens
+        metadata["output_tokens"] = response.usage.completion_tokens
+        metadata["latency"] = latency
+        return result, metadata
+
+    def decode_ast(self,result,language="Python"):
+        decoded_output = []
+        for invoked_function in result:
+            name = list(invoked_function.keys())[0]
+            params = json.loads(invoked_function[name])
+            if language == "Python":
+                pass
+            else:
+                # all values of the json are casted to string for java and javascript
+                for key in params:
+                    params[key] = str(params[key])
+            decoded_output.append({name: params})
+
+        return decoded_output
+
+    def decode_execute(self,result):
+        function_call = convert_to_function_call(result)
+        return function_call