diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-0520.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-0520.yaml index 7c8da51d1b82ff..035d9881ebcaa7 100644 --- a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-0520.yaml +++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-0520.yaml @@ -8,6 +8,7 @@ features: - stream-tool-call model_properties: mode: chat + context_size: 131072 parameter_rules: - name: temperature use_template: temperature diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-air.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-air.yaml index 7a7b4b0892785e..c3ee76141d9a57 100644 --- a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-air.yaml +++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-air.yaml @@ -8,6 +8,7 @@ features: - stream-tool-call model_properties: mode: chat + context_size: 131072 parameter_rules: - name: temperature use_template: temperature diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-airx.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-airx.yaml index 09ad842801eb9d..1926db7ac384aa 100644 --- a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-airx.yaml +++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-airx.yaml @@ -8,6 +8,7 @@ features: - stream-tool-call model_properties: mode: chat + context_size: 8192 parameter_rules: - name: temperature use_template: temperature diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-flash.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-flash.yaml index aee82a0602a995..e54b5de4a12032 100644 --- a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-flash.yaml +++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-flash.yaml @@ -8,6 +8,7 @@ features: - stream-tool-call model_properties: mode: chat + context_size: 131072 parameter_rules: - name: temperature use_template: temperature diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-flashx.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-flashx.yaml index 40ff7609c7a2e2..724fe48909b1fb 100644 --- a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-flashx.yaml +++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-flashx.yaml @@ -8,6 +8,7 @@ features: - stream-tool-call model_properties: mode: chat + context_size: 131072 parameter_rules: - name: temperature use_template: temperature diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_3_turbo.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_3_turbo.yaml index 791a77ba157cf7..fa5b1e1fe9d7cf 100644 --- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_3_turbo.yaml +++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_3_turbo.yaml @@ -8,6 +8,7 @@ features: - stream-tool-call model_properties: mode: chat + context_size: 131072 parameter_rules: - name: temperature use_template: temperature diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4.yaml index 13ed1e49c99a2a..e1eb13df3db438 100644 --- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4.yaml +++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4.yaml @@ -8,6 +8,7 @@ features: - stream-tool-call model_properties: mode: chat + context_size: 131072 parameter_rules: - name: temperature use_template: temperature diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_long.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_long.yaml index badcee22db77b1..c0c4e04d378e43 100644 --- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_long.yaml +++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_long.yaml @@ -8,7 +8,7 @@ features: - stream-tool-call model_properties: mode: chat - context_size: 10240 + context_size: 1048576 parameter_rules: - name: temperature use_template: temperature diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_plus.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_plus.yaml index e2f785e1bc4383..c4f26f8ba93130 100644 --- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_plus.yaml +++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_plus.yaml @@ -8,6 +8,7 @@ features: - stream-tool-call model_properties: mode: chat + context_size: 131072 parameter_rules: - name: temperature use_template: temperature diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v.yaml index 3baa298300a8e1..0d99f89cb80d00 100644 --- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v.yaml +++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v.yaml @@ -4,6 +4,7 @@ label: model_type: llm model_properties: mode: chat + context_size: 2048 features: - vision parameter_rules: diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v_plus.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v_plus.yaml index dbda18b8888feb..5cd0e16b0eef7b 100644 --- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v_plus.yaml +++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v_plus.yaml @@ -4,6 +4,7 @@ label: model_type: llm model_properties: mode: chat + context_size: 8192 features: - vision - video diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/llm.py b/api/core/model_runtime/model_providers/zhipuai/llm/llm.py index eddb94aba35a93..e0601d681cbf74 100644 --- a/api/core/model_runtime/model_providers/zhipuai/llm/llm.py +++ b/api/core/model_runtime/model_providers/zhipuai/llm/llm.py @@ -22,18 +22,6 @@ from core.model_runtime.model_providers.zhipuai._common import _CommonZhipuaiAI from core.model_runtime.utils import helper -GLM_JSON_MODE_PROMPT = """You should always follow the instructions and output a valid JSON object. -The structure of the JSON object you can found in the instructions, use {"answer": "$your_answer"} as the default structure -if you are not sure about the structure. - -And you should always end the block with a "```" to indicate the end of the JSON object. - - -{{instructions}} - - -```JSON""" # noqa: E501 - class ZhipuAILargeLanguageModel(_CommonZhipuaiAI, LargeLanguageModel): def _invoke( @@ -64,42 +52,8 @@ def _invoke( credentials_kwargs = self._to_credential_kwargs(credentials) # invoke model - # stop = stop or [] - # self._transform_json_prompts(model, credentials, prompt_messages, model_parameters, tools, stop, stream, user) return self._generate(model, credentials_kwargs, prompt_messages, model_parameters, tools, stop, stream, user) - # def _transform_json_prompts(self, model: str, credentials: dict, - # prompt_messages: list[PromptMessage], model_parameters: dict, - # tools: list[PromptMessageTool] | None = None, stop: list[str] | None = None, - # stream: bool = True, user: str | None = None) \ - # -> None: - # """ - # Transform json prompts to model prompts - # """ - # if "}\n\n" not in stop: - # stop.append("}\n\n") - - # # check if there is a system message - # if len(prompt_messages) > 0 and isinstance(prompt_messages[0], SystemPromptMessage): - # # override the system message - # prompt_messages[0] = SystemPromptMessage( - # content=GLM_JSON_MODE_PROMPT.replace("{{instructions}}", prompt_messages[0].content) - # ) - # else: - # # insert the system message - # prompt_messages.insert(0, SystemPromptMessage( - # content=GLM_JSON_MODE_PROMPT.replace("{{instructions}}", "Please output a valid JSON object.") - # )) - # # check if the last message is a user message - # if len(prompt_messages) > 0 and isinstance(prompt_messages[-1], UserPromptMessage): - # # add ```JSON\n to the last message - # prompt_messages[-1].content += "\n```JSON\n" - # else: - # # append a user message - # prompt_messages.append(UserPromptMessage( - # content="```JSON\n" - # )) - def get_num_tokens( self, model: str, @@ -170,7 +124,7 @@ def _generate( :return: full response or stream response chunk generator result """ extra_model_kwargs = {} - # request to glm-4v-plus with stop words will always response "finish_reason":"network_error" + # request to glm-4v-plus with stop words will always respond "finish_reason":"network_error" if stop and model != "glm-4v-plus": extra_model_kwargs["stop"] = stop @@ -186,7 +140,7 @@ def _generate( # resolve zhipuai model not support system message and user message, assistant message must be in sequence new_prompt_messages: list[PromptMessage] = [] for prompt_message in prompt_messages: - copy_prompt_message = prompt_message.copy() + copy_prompt_message = prompt_message.model_copy() if copy_prompt_message.role in {PromptMessageRole.USER, PromptMessageRole.SYSTEM, PromptMessageRole.TOOL}: if isinstance(copy_prompt_message.content, list): # check if model is 'glm-4v' @@ -238,59 +192,38 @@ def _generate( params = self._construct_glm_4v_parameter(model, new_prompt_messages, model_parameters) else: params = {"model": model, "messages": [], **model_parameters} - # glm model - if not model.startswith("chatglm"): - for prompt_message in new_prompt_messages: - if prompt_message.role == PromptMessageRole.TOOL: + for prompt_message in new_prompt_messages: + if prompt_message.role == PromptMessageRole.TOOL: + params["messages"].append( + { + "role": "tool", + "content": prompt_message.content, + "tool_call_id": prompt_message.tool_call_id, + } + ) + elif isinstance(prompt_message, AssistantPromptMessage): + if prompt_message.tool_calls: params["messages"].append( { - "role": "tool", + "role": "assistant", "content": prompt_message.content, - "tool_call_id": prompt_message.tool_call_id, + "tool_calls": [ + { + "id": tool_call.id, + "type": tool_call.type, + "function": { + "name": tool_call.function.name, + "arguments": tool_call.function.arguments, + }, + } + for tool_call in prompt_message.tool_calls + ], } ) - elif isinstance(prompt_message, AssistantPromptMessage): - if prompt_message.tool_calls: - params["messages"].append( - { - "role": "assistant", - "content": prompt_message.content, - "tool_calls": [ - { - "id": tool_call.id, - "type": tool_call.type, - "function": { - "name": tool_call.function.name, - "arguments": tool_call.function.arguments, - }, - } - for tool_call in prompt_message.tool_calls - ], - } - ) - else: - params["messages"].append({"role": "assistant", "content": prompt_message.content}) else: - params["messages"].append( - {"role": prompt_message.role.value, "content": prompt_message.content} - ) - else: - # chatglm model - for prompt_message in new_prompt_messages: - # merge system message to user message - if prompt_message.role in { - PromptMessageRole.SYSTEM, - PromptMessageRole.TOOL, - PromptMessageRole.USER, - }: - if len(params["messages"]) > 0 and params["messages"][-1]["role"] == "user": - params["messages"][-1]["content"] += "\n\n" + prompt_message.content - else: - params["messages"].append({"role": "user", "content": prompt_message.content}) - else: - params["messages"].append( - {"role": prompt_message.role.value, "content": prompt_message.content} - ) + params["messages"].append({"role": "assistant", "content": prompt_message.content}) + else: + params["messages"].append({"role": prompt_message.role.value, "content": prompt_message.content}) if tools and len(tools) > 0: params["tools"] = [{"type": "function", "function": helper.dump_model(tool)} for tool in tools] @@ -406,7 +339,7 @@ def _handle_generate_stream_response( Handle llm stream response :param model: model name - :param response: response + :param responses: response :param prompt_messages: prompt messages :return: llm response chunk generator result """ @@ -505,7 +438,7 @@ def _convert_messages_to_prompt( if tools and len(tools) > 0: text += "\n\nTools:" for tool in tools: - text += f"\n{tool.json()}" + text += f"\n{tool.model_dump_json()}" # trim off the trailing ' ' that might come from the "Assistant: " return text.rstrip()