Skip to content

Commit 2892810

Browse files
committed
feat(model/llm): add glm-4.1v model
1 parent 39fd59e commit 2892810

File tree

2 files changed

+24
-14
lines changed

2 files changed

+24
-14
lines changed

xinference/model/llm/llm_family.json

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18251,7 +18251,7 @@
1825118251
"reasoning_end_tag": "</think>"
1825218252
},
1825318253
{
18254-
"version": 1,
18254+
"version": 2,
1825518255
"context_length": 65536,
1825618256
"model_name": "glm-4.1v",
1825718257
"model_lang": [
@@ -18267,11 +18267,22 @@
1826718267
{
1826818268
"model_format": "pytorch",
1826918269
"model_size_in_billions": 9,
18270-
"quantizations": [
18271-
"none"
18272-
],
18273-
"model_revision": "master",
18274-
"model_id": "ZhipuAI/GLM-4.1V-9B-Base"
18270+
"model_src": {
18271+
"huggingface": {
18272+
"quantizations": [
18273+
"none"
18274+
],
18275+
"model_id": "THUDM/GLM-4.1V-9B-Base",
18276+
"model_revision": "34507daeedba84517747844915f08f191521a83a"
18277+
},
18278+
"modelscope": {
18279+
"quantizations": [
18280+
"none"
18281+
],
18282+
"model_id": "ZhipuAI/GLM-4.1V-9B-Base",
18283+
"model_revision": "master"
18284+
}
18285+
}
1827518286
}
1827618287
],
1827718288
"chat_template": "",

xinference/model/llm/transformers/multimodal/glm4_1v.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from .....core.model import register_batching_multimodal_models
2323
from .....core.scheduler import InferenceRequest
2424
from .....model.utils import select_device
25-
from ...llm_family import LLMFamilyV1, LLMSpecV1, register_transformer
25+
from ...llm_family import LLMFamilyV2, LLMSpecV1, register_transformer
2626
from ...utils import _decode_image
2727
from ..core import register_non_default_model
2828
from ..utils import get_max_src_len
@@ -37,7 +37,7 @@
3737
class Glm4_1VModel(PytorchMultiModalModel):
3838
@classmethod
3939
def match_json(
40-
cls, model_family: "LLMFamilyV1", model_spec: "LLMSpecV1", quantization: str
40+
cls, model_family: "LLMFamilyV2", model_spec: "LLMSpecV1", quantization: str
4141
) -> bool:
4242
family = model_family.model_family or model_family.model_name
4343
if "glm-4.1v" in family.lower():
@@ -56,17 +56,16 @@ def load_processor(self):
5656
)
5757

5858
def load_multimodal_model(self):
59-
from transformers import AutoModel
60-
from transformers import Glm4vConfig
59+
from transformers import AutoModel, Glm4vConfig
6160

6261
kwargs = {"device_map": self._device}
6362
kwargs = self.apply_bnb_quantization(kwargs)
6463

6564
model = AutoModel.from_pretrained(
66-
self.model_path,
67-
trust_remote_code=True,
68-
**kwargs,
69-
)
65+
self.model_path,
66+
trust_remote_code=True,
67+
**kwargs,
68+
)
7069
self._model = model.eval()
7170
# Specify hyperparameters for generation
7271
self._model.generation_config = Glm4vConfig.from_pretrained(

0 commit comments

Comments
 (0)