Skip to content

Commit c85c354

Browse files
OliverBryantqinxuyegithub-actions[bot]
authored
chore: sync model "FLUX.2-dev" JSON (#4292)
Co-authored-by: qinxuye <qinxuye@gmail.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
1 parent 66de4e0 commit c85c354

File tree

13 files changed

+954
-44
lines changed

13 files changed

+954
-44
lines changed

doc/source/gen_docs.py

Lines changed: 88 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -98,44 +98,95 @@ def mock_engine_libraries():
9898
# Mock platform checks BEFORE importing xinference modules
9999
def mock_platform_checks():
100100
"""Mock platform and hardware checks for documentation generation"""
101-
from unittest.mock import patch
102-
import sys
103-
import platform
104-
105-
# Mock platform system for MLX (make it appear as Apple Silicon)
106-
sys.platform = "darwin"
107-
platform.system = lambda: "Darwin"
108-
platform.processor = lambda: "arm"
109-
110-
# Mock vLLM platform checks
111-
import xinference.model.llm.vllm.core as vllm_core
112-
vllm_core.VLLMModel._is_linux = lambda: True
113-
vllm_core.VLLMModel._has_cuda_device = lambda: True
114-
vllm_core.VLLMChatModel._is_linux = lambda: True
115-
vllm_core.VLLMChatModel._has_cuda_device = lambda: True
116-
vllm_core.VLLMMultiModel._is_linux = lambda: True
117-
vllm_core.VLLMMultiModel._has_cuda_device = lambda: True
118-
119-
# Mock SGLang platform checks if available
101+
# Import and mock engine checks without modifying system-wide platform settings
120102
try:
121-
import xinference.model.llm.sglang.core as sglang_core
122-
sglang_core.SGLANGModel._is_linux = lambda: True
123-
sglang_core.SGLANGModel._has_cuda_device = lambda: True
124-
sglang_core.SGLANGChatModel._is_linux = lambda: True
125-
sglang_core.SGLANGChatModel._has_cuda_device = lambda: True
126-
sglang_core.SGLANGVisionModel._is_linux = lambda: True
127-
sglang_core.SGLANGVisionModel._has_cuda_device = lambda: True
128-
except ImportError:
129-
pass
130-
131-
# Mock LMDEPLOY platform checks if available
132-
try:
133-
import xinference.model.llm.lmdeploy.core as lmdeploy_core
134-
lmdeploy_core.LMDeployModel._is_linux = lambda: True
135-
lmdeploy_core.LMDeployModel._has_cuda_device = lambda: True
136-
lmdeploy_core.LMDeployChatModel._is_linux = lambda: True
137-
lmdeploy_core.LMDeployChatModel._has_cuda_device = lambda: True
138-
except ImportError:
103+
# Mock vLLM platform checks
104+
import xinference.model.llm.vllm.core as vllm_core
105+
vllm_core.VLLMModel._is_linux = lambda: True
106+
vllm_core.VLLMModel._has_cuda_device = lambda: True
107+
vllm_core.VLLMChatModel._is_linux = lambda: True
108+
vllm_core.VLLMChatModel._has_cuda_device = lambda: True
109+
vllm_core.VLLMMultiModel._is_linux = lambda: True
110+
vllm_core.VLLMMultiModel._has_cuda_device = lambda: True
111+
112+
# Mock SGLang platform checks if available
113+
try:
114+
import xinference.model.llm.sglang.core as sglang_core
115+
sglang_core.SGLANGModel._is_linux = lambda: True
116+
sglang_core.SGLANGModel._has_cuda_device = lambda: True
117+
sglang_core.SGLANGChatModel._is_linux = lambda: True
118+
sglang_core.SGLANGChatModel._has_cuda_device = lambda: True
119+
sglang_core.SGLANGVisionModel._is_linux = lambda: True
120+
sglang_core.SGLANGVisionModel._has_cuda_device = lambda: True
121+
except ImportError:
122+
pass
123+
124+
# Mock LMDEPLOY platform checks if available
125+
try:
126+
import xinference.model.llm.lmdeploy.core as lmdeploy_core
127+
lmdeploy_core.LMDeployModel._is_linux = lambda: True
128+
lmdeploy_core.LMDeployModel._has_cuda_device = lambda: True
129+
lmdeploy_core.LMDeployChatModel._is_linux = lambda: True
130+
lmdeploy_core.LMDeployChatModel._has_cuda_device = lambda: True
131+
except ImportError:
132+
pass
133+
134+
# Mock MLX engine platform checks by monkey-patching the imports within MLX module
135+
try:
136+
# First, let's monkey-patch sys and platform imports within the MLX module only
137+
import xinference.model.llm.mlx.core as mlx_core
138+
139+
# Create mock objects that look like sys.platform and platform functions
140+
class MockSys:
141+
platform = "darwin"
142+
143+
class MockPlatform:
144+
@staticmethod
145+
def system():
146+
return "Darwin"
147+
148+
@staticmethod
149+
def processor():
150+
return "arm"
151+
152+
# Store original references
153+
original_mlx_match = mlx_core.MLXModel.match_json
154+
original_mlx_chat_match = mlx_core.MLXChatModel.match_json
155+
original_mlx_vision_match = mlx_core.MLXVisionModel.match_json
156+
157+
# Now create wrapper functions that replace sys and platform only during the platform check
158+
def create_wrapped_match_json(original_match):
159+
def wrapped_match_json(cls, llm_family, llm_spec, quantization):
160+
# Temporarily replace sys and platform in the MLX module
161+
import sys as original_sys
162+
import platform as original_platform
163+
164+
# Replace sys and platform temporarily
165+
mlx_core.sys = MockSys()
166+
mlx_core.platform = MockPlatform()
167+
168+
try:
169+
# Call the original match_json which will now see the mocked platform
170+
result = original_match.__func__(cls, llm_family, llm_spec, quantization)
171+
return result
172+
finally:
173+
# Restore original sys and platform
174+
mlx_core.sys = original_sys
175+
mlx_core.platform = original_platform
176+
177+
return classmethod(wrapped_match_json)
178+
179+
# Apply the wrapped match_json methods
180+
mlx_core.MLXModel.match_json = create_wrapped_match_json(original_mlx_match)
181+
mlx_core.MLXChatModel.match_json = create_wrapped_match_json(original_mlx_chat_match)
182+
mlx_core.MLXVisionModel.match_json = create_wrapped_match_json(original_mlx_vision_match)
183+
184+
except ImportError:
185+
pass
186+
187+
except Exception as e:
188+
# If any mocking fails, continue without it
189+
print(f"Warning: Could not mock some engine platform checks: {e}")
139190
pass
140191

141192
mock_platform_checks()

doc/source/models/builtin/embedding/index.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,10 @@ The following is a list of built-in embedding models in Xinference:
4141

4242
e5-large-v2
4343

44+
gme-qwen2-vl-2b-instruct
45+
46+
gme-qwen2-vl-7b-instruct
47+
4448
gte-base
4549

4650
gte-large
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
.. _models_builtin_flux.2-dev:
2+
3+
==========
4+
FLUX.2-dev
5+
==========
6+
7+
- **Model Name:** FLUX.2-dev
8+
- **Model Family:** stable_diffusion
9+
- **Abilities:** text2image, image2image, inpainting
10+
- **Available ControlNet:** None
11+
12+
Specifications
13+
^^^^^^^^^^^^^^
14+
15+
- **Model ID:** black-forest-labs/FLUX.2-dev
16+
- **GGUF Model ID**: city96/FLUX.2-dev-gguf
17+
- **GGUF Quantizations**: BF16, Q2_K, Q3_K_M, Q3_K_S, Q4_0, Q4_1, Q4_K_M, Q4_K_S, Q5_0, Q5_1, Q5_K_M, Q5_K_S, Q6_K, Q8_0
18+
19+
20+
Execute the following command to launch the model::
21+
22+
xinference launch --model-name FLUX.2-dev --model-type image
23+
24+
25+
For GGUF quantization, using below command::
26+
27+
xinference launch --model-name FLUX.2-dev --model-type image --gguf_quantization ${gguf_quantization} --cpu_offload True
28+
29+
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
.. _models_builtin_hunyuanocr:
2+
3+
==========
4+
HunyuanOCR
5+
==========
6+
7+
- **Model Name:** HunyuanOCR
8+
- **Model Family:** ocr
9+
- **Abilities:** ocr
10+
- **Available ControlNet:** None
11+
12+
Specifications
13+
^^^^^^^^^^^^^^
14+
15+
- **Model ID:** tencent/HunyuanOCR
16+
17+
Execute the following command to launch the model::
18+
19+
xinference launch --model-name HunyuanOCR --model-type image
20+
21+
22+

doc/source/models/builtin/image/index.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,18 +13,24 @@ The following is a list of built-in image models in Xinference:
1313

1414
cogview4
1515

16+
deepseek-ocr
17+
1618
flux.1-dev
1719

1820
flux.1-kontext-dev
1921

2022
flux.1-schnell
2123

24+
flux.2-dev
25+
2226
got-ocr2_0
2327

2428
hunyuandit-v1.2
2529

2630
hunyuandit-v1.2-distilled
2731

32+
hunyuanocr
33+
2834
kolors
2935

3036
qwen-image

doc/source/models/builtin/llm/index.rst

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -366,6 +366,11 @@ The following is a list of built-in LLM in Xinference:
366366
- 32768
367367
- MiniCPM4 series are highly efficient large language models (LLMs) designed explicitly for end-side devices, which achieves this efficiency through systematic innovation in four key dimensions: model architecture, training data, training algorithms, and inference systems.
368368

369+
* - :ref:`minimax-m2 <models_llm_minimax-m2>`
370+
- chat, tools, reasoning
371+
- 196608
372+
- MiniMax-M2, a Mini model built for Max coding & agentic workflows.
373+
369374
* - :ref:`mistral-instruct-v0.1 <models_llm_mistral-instruct-v0.1>`
370375
- chat
371376
- 8192
@@ -534,7 +539,7 @@ The following is a list of built-in LLM in Xinference:
534539
* - :ref:`qwen3 <models_llm_qwen3>`
535540
- chat, reasoning, hybrid, tools
536541
- 40960
537-
- Qwen3 is the latest generation of large language models in Qwen series, offering a comprehensive suite of dense and mixture-of-experts (MoE) models. Built upon extensive training, Qwen3 delivers groundbreaking advancements in reasoning, instruction-following, agent capabilities, and multilingual support
542+
- Qwen3 is the latest generation of large language models in Qwen series, offering a comprehensive suite of dense and mixture-of-experts (MoE) models. Built upon extensive training, Qwen3 delivers groundbreaking advancements in reasoning, instruction-following, agent capabilities, and multilingual support.
538543

539544
* - :ref:`qwen3-coder <models_llm_qwen3-coder>`
540545
- chat, tools
@@ -846,6 +851,8 @@ The following is a list of built-in LLM in Xinference:
846851

847852
minicpm4
848853

854+
minimax-m2
855+
849856
mistral-instruct-v0.1
850857

851858
mistral-instruct-v0.2

doc/source/models/builtin/llm/minimax-m2.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ Model Spec 3 (mlx, 230 Billion)
5252
- **Model Format:** mlx
5353
- **Model Size (in billions):** 230
5454
- **Quantizations:** 3bit, 4bit, 5bit, 6bit, 8bit
55-
- **Engines**:
55+
- **Engines**: MLX
5656
- **Model ID:** mlx-community/MiniMax-M2-{quantization}
5757
- **Model Hubs**: `Hugging Face <https://huggingface.co/mlx-community/MiniMax-M2-{quantization}>`__, `ModelScope <https://modelscope.cn/models/mlx-community/MiniMax-M2-{quantization}>`__
5858

0 commit comments

Comments
 (0)