Skip to content

Commit 98a3c8a

Browse files
Jun-Howieqinxuye
andauthored
FEAT: [model] Support Qwen3-Next (#4113)
Co-authored-by: qinxuye <qinxuye@gmail.com>
1 parent bc3b42c commit 98a3c8a

File tree

3 files changed

+231
-2
lines changed

3 files changed

+231
-2
lines changed

xinference/model/llm/llm_family.json

Lines changed: 227 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21699,7 +21699,8 @@
2169921699
"stop": [
2170021700
"<|endoftext|>",
2170121701
"<|im_end|>"
21702-
]
21702+
],
21703+
"tool_parser":"qwen"
2170321704
},
2170421705
{
2170521706
"version": 2,
@@ -21785,6 +21786,230 @@
2178521786
"<|im_end|>"
2178621787
],
2178721788
"reasoning_start_tag": "<think>",
21788-
"reasoning_end_tag": "</think>"
21789+
"reasoning_end_tag": "</think>",
21790+
"tool_parser":"qwen"
21791+
},
21792+
{
21793+
"version": 2,
21794+
"context_length": 262144,
21795+
"model_name": "Qwen3-Next-Instruct",
21796+
"model_lang": [
21797+
"en",
21798+
"zh"
21799+
],
21800+
"model_ability": [
21801+
"chat",
21802+
"tools"
21803+
],
21804+
"model_description": "Qwen3-Next-80B-A3B is the first installment in the Qwen3-Next series",
21805+
"model_specs": [
21806+
{
21807+
"model_format": "pytorch",
21808+
"model_size_in_billions": 80,
21809+
"activated_size_in_billions": 3,
21810+
"model_src": {
21811+
"huggingface": {
21812+
"quantizations": [
21813+
"none"
21814+
],
21815+
"model_id": "Qwen/Qwen3-Next-80B-A3B-Instruct"
21816+
},
21817+
"modelscope": {
21818+
"quantizations": [
21819+
"none"
21820+
],
21821+
"model_id": "Qwen/Qwen3-Next-80B-A3B-Instruct"
21822+
}
21823+
}
21824+
},
21825+
{
21826+
"model_format": "fp8",
21827+
"model_size_in_billions": 80,
21828+
"activated_size_in_billions": 3,
21829+
"model_src": {
21830+
"huggingface": {
21831+
"quantizations": [
21832+
"fp8"
21833+
],
21834+
"model_id": "Qwen/Qwen3-Next-80B-A3B-Instruct-FP8"
21835+
},
21836+
"modelscope": {
21837+
"quantizations": [
21838+
"fp8"
21839+
],
21840+
"model_id": "Qwen/Qwen3-Next-80B-A3B-Instruct-FP8"
21841+
}
21842+
}
21843+
},
21844+
{
21845+
"model_format": "awq",
21846+
"model_size_in_billions": 80,
21847+
"activated_size_in_billions": 3,
21848+
"model_src": {
21849+
"huggingface": {
21850+
"quantizations": [
21851+
"4bit",
21852+
"8bit"
21853+
],
21854+
"model_id": "cpatonn/Qwen3-Next-80B-A3B-Instruct-AWQ-{quantization}"
21855+
},
21856+
"modelscope": {
21857+
"quantizations": [
21858+
"4bit",
21859+
"8bit"
21860+
],
21861+
"model_id": "cpatonn-mirror/Qwen3-Next-80B-A3B-Instruct-AWQ-{quantization}"
21862+
}
21863+
}
21864+
},
21865+
{
21866+
"model_format": "mlx",
21867+
"model_size_in_billions": 80,
21868+
"activated_size_in_billions": 3,
21869+
"model_src": {
21870+
"huggingface": {
21871+
"quantizations": [
21872+
"4bit",
21873+
"5bit",
21874+
"6bit",
21875+
"8bit"
21876+
],
21877+
"model_id": "mlx-community/Qwen3-Next-80B-A3B-Instruct-{quantization}"
21878+
},
21879+
"modelscope": {
21880+
"quantizations": [
21881+
"4bit",
21882+
"5bit",
21883+
"6bit",
21884+
"8bit"
21885+
],
21886+
"model_id": "mlx-community/Qwen3-Next-80B-A3B-Instruct-{quantization}"
21887+
}
21888+
}
21889+
}
21890+
],
21891+
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0].role == 'system' %}\n {{- messages[0].content + '\\n\\n' }}\n {%- endif %}\n {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0].role == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if message.content is string %}\n {%- set content = message.content %}\n {%- else %}\n {%- set content = '' %}\n {%- endif %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- if (loop.first and content) or (not loop.first) %}\n {{- '\\n' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}",
21892+
"stop_token_ids": [
21893+
151643,
21894+
151645
21895+
],
21896+
"stop": [
21897+
"<|endoftext|>",
21898+
"<|im_end|>"
21899+
],
21900+
"tool_parser":"qwen"
21901+
},
21902+
{
21903+
"version": 2,
21904+
"context_length": 262144,
21905+
"model_name": "Qwen3-Next-Thinking",
21906+
"model_lang": [
21907+
"en",
21908+
"zh"
21909+
],
21910+
"model_ability": [
21911+
"chat",
21912+
"reasoning",
21913+
"tools"
21914+
],
21915+
"model_description": "Qwen3-Next-80B-A3B is the first installment in the Qwen3-Next series",
21916+
"model_specs": [
21917+
{
21918+
"model_format": "pytorch",
21919+
"model_size_in_billions": 80,
21920+
"activated_size_in_billions": 3,
21921+
"model_src": {
21922+
"huggingface": {
21923+
"quantizations": [
21924+
"none"
21925+
],
21926+
"model_id": "Qwen/Qwen3-Next-80B-A3B-Thinking"
21927+
},
21928+
"modelscope": {
21929+
"quantizations": [
21930+
"none"
21931+
],
21932+
"model_id": "Qwen/Qwen3-Next-80B-A3B-Thinking"
21933+
}
21934+
}
21935+
},
21936+
{
21937+
"model_format": "fp8",
21938+
"model_size_in_billions": 80,
21939+
"activated_size_in_billions": 3,
21940+
"model_src": {
21941+
"huggingface": {
21942+
"quantizations": [
21943+
"fp8"
21944+
],
21945+
"model_id": "Qwen/Qwen3-Next-80B-A3B-Thinking-FP8"
21946+
},
21947+
"modelscope": {
21948+
"quantizations": [
21949+
"fp8"
21950+
],
21951+
"model_id": "Qwen/Qwen3-Next-80B-A3B-Thinking-FP8"
21952+
}
21953+
}
21954+
},
21955+
{
21956+
"model_format": "awq",
21957+
"model_size_in_billions": 80,
21958+
"activated_size_in_billions": 3,
21959+
"model_src": {
21960+
"huggingface": {
21961+
"quantizations": [
21962+
"4bit",
21963+
"8bit"
21964+
],
21965+
"model_id": "cpatonn/Qwen3-Next-80B-A3B-Thinking-AWQ-{quantization}"
21966+
},
21967+
"modelscope": {
21968+
"quantizations": [
21969+
"4bit",
21970+
"8bit"
21971+
],
21972+
"model_id": "cpatonn-mirror/Qwen3-Next-80B-A3B-Thinking-AWQ-{quantization}"
21973+
}
21974+
}
21975+
},
21976+
{
21977+
"model_format": "mlx",
21978+
"model_size_in_billions": 80,
21979+
"activated_size_in_billions": 3,
21980+
"model_src": {
21981+
"huggingface": {
21982+
"quantizations": [
21983+
"4bit",
21984+
"5bit",
21985+
"6bit",
21986+
"8bit"
21987+
],
21988+
"model_id": "mlx-community/Qwen3-Next-80B-A3B-Thinking-{quantization}"
21989+
},
21990+
"modelscope": {
21991+
"quantizations": [
21992+
"4bit",
21993+
"5bit",
21994+
"6bit",
21995+
"8bit"
21996+
],
21997+
"model_id": "mlx-community/Qwen3-Next-80B-A3B-Thinking-{quantization}"
21998+
}
21999+
}
22000+
}
22001+
],
22002+
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0].role == 'system' %}\n {{- messages[0].content + '\\n\\n' }}\n {%- endif %}\n {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0].role == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n {%- set index = (messages|length - 1) - loop.index0 %}\n {%- if ns.multi_step_tool and message.role == \"user\" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}\n {%- set ns.multi_step_tool = false %}\n {%- set ns.last_query_index = index %}\n {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n {%- if message.content is string %}\n {%- set content = message.content %}\n {%- else %}\n {%- set content = '' %}\n {%- endif %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set reasoning_content = '' %}\n {%- if message.reasoning_content is string %}\n {%- set reasoning_content = message.reasoning_content %}\n {%- else %}\n {%- if '</think>' in content %}\n {%- set reasoning_content = content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}\n {%- set content = content.split('</think>')[-1].lstrip('\\n') %}\n {%- endif %}\n {%- endif %}\n {%- if loop.index0 > ns.last_query_index %}\n {%- if loop.last or (not loop.last and reasoning_content) %}\n {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- if (loop.first and content) or (not loop.first) %}\n {{- '\\n' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n<think>\\n' }}\n{%- endif %}",
22003+
"stop_token_ids": [
22004+
151643,
22005+
151645
22006+
],
22007+
"stop": [
22008+
"<|endoftext|>",
22009+
"<|im_end|>"
22010+
],
22011+
"reasoning_start_tag": "<think>",
22012+
"reasoning_end_tag": "</think>",
22013+
"tool_parser":"qwen"
2178922014
}
2179022015
]

xinference/model/llm/utils.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@
7373
"Qwen3-Coder",
7474
"Qwen3-VL-Instruct",
7575
"Qwen3-VL-Thinking",
76+
"Qwen3-Next-Instruct",
77+
"Qwen3-Next-Thinking",
7678
]
7779

7880
GLM4_TOOL_CALL_FAMILY = [

xinference/model/llm/vllm/core.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,8 @@ class VLLMGenerateConfig(TypedDict, total=False):
288288

289289
if VLLM_INSTALLED and VLLM_VERSION >= version.parse("0.10.2"):
290290
VLLM_SUPPORTED_CHAT_MODELS.append("seed-oss")
291+
VLLM_SUPPORTED_CHAT_MODELS.append("Qwen3-Next-Instruct")
292+
VLLM_SUPPORTED_CHAT_MODELS.append("Qwen3-Next-Thinking")
291293

292294
if VLLM_INSTALLED and VLLM_VERSION > version.parse("0.10.2"):
293295
VLLM_SUPPORTED_VISION_MODEL_LIST.append("Qwen3-VL-Instruct")

0 commit comments

Comments
 (0)