Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix post encode #2643

Merged
merged 10 commits into from
Dec 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/source/Instruction/命令行参数.md
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@
- 🔥freeze_llm: 冻结LLM. 默认为False. 可用于全参和LoRA
- 🔥freeze_vit: 冻结ViT. 默认为True. 可用于全参和LoRA
- 🔥freeze_aligner: 冻结aligner. 默认为True, 可用于全参和LoRA
- 🔥target_modules: 指定lora模块, 默认为`all-linear`, 自动寻找除lm_head外的linear并附加tuner. 该参数不限于LoRA
- 🔥target_modules: 指定lora模块, 默认为`all-linear`. 在LLM和多模态LLM中,其行为有所不同. 若是LLM则自动寻找除lm_head外的linear并附加tuner,若是多模态LLM,则默认只在LLM上附加tuner,该行为可以被`freeze_llm`, `freeze_vit`, `freeze_aligner`控制. 该参数不限于LoRA
- 🔥target_regex: 指定lora模块的regex表达式. 默认为`None`, 如果该值传入, 则target_modules不生效. 该参数不限于LoRA
- 🔥init_weights: 初始化weights的方法, LoRA可以指定为`true`, `false`, `guassian`, `pissa`, `pissa_niter_[number of iters]`, Bone可以指定为`true`, `false`, `bat`, 默认值`true`
- modules_to_save: 在已附加tuner后,原模型参与训练和存储的模块,默认为`[]`. 该参数不限于LoRA
Expand Down
2 changes: 1 addition & 1 deletion docs/source_en/Instruction/Command-line-parameters.md
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ Other important parameters:
- 🔥freeze_llm: Freeze LLM. Default is False. Applicable for full parameters and LoRA.
- 🔥freeze_vit: Freeze ViT. Default is True. Applicable for full parameters and LoRA.
- 🔥freeze_aligner: Freeze aligner. Default is True, applicable for full parameters and LoRA.
- 🔥target_modules: Specify the LoRA module, default is `all-linear`, automatically finds linear layers except for lm_head and attaches the tuner. This parameter is not limited to LoRA.
- 🔥target_modules: The specified LoRA module defaults to `all-linear`. This behavior differs in LLM and multimodal LLM. If it is LLM, it will automatically search for linear except lm_head and attach tuner. If it is multimodal LLM, it defaults to attach tuner only on LLM, and this behavior can be controlled by `freeze_llm`, `freeze_vit`, `freeze_aligner`. This parameter is not limited to LoRA.
- 🔥target_regex: Specify a regex expression for the LoRA module. Default is `None`, if this value is provided, target_modules does not take effect. This parameter is not limited to LoRA.
- 🔥init_weights: The method of init tuner weights, For lora the accepted values are `true`, `false`, `guassian`, `pissa`, `pissa_niter_[number of iters]`, for bone are `true`, `false`, `bat`, default is `true`
- modules_to_save: After the tuner is attached, the original model's modules used during training and storage, default is `[]`. This parameter is not limited to LoRA.
Expand Down
3 changes: 2 additions & 1 deletion swift/llm/argument/train_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,8 @@ def __post_init__(self) -> None:
if self.lazy_tokenize is None:
self.lazy_tokenize = self.model_meta.is_multimodal and not self.streaming
logger.info(f'Setting args.lazy_tokenize: {self.lazy_tokenize}')
self.accelerator_config = {'dispatch_batches': False}
if getattr(self, 'accelerator_config', None) is None:
self.accelerator_config = {'dispatch_batches': False}
self.training_args = TrainerFactory.get_training_args(self)

self._add_version()
Expand Down
2 changes: 2 additions & 0 deletions swift/llm/dataset/preprocessor/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,8 @@ def rows_to_batched(rows: List[Dict[str, Any]]):
if k not in batched:
batched[k] = [None] * i
batched[k].append(v)
# Make all the lengths of v the same.
batched = {k: v + [None] * (len(rows) - len(v)) for k, v in batched.items()}
return batched

@staticmethod
Expand Down
8 changes: 4 additions & 4 deletions swift/llm/model/model/internlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,9 +232,9 @@ def get_model_tokenizer_internvl(model_dir: str,
], ),
],
TemplateType.xcomposer2_5,
get_model_tokenizer_internlm_xcomposer2,
partial(get_model_tokenizer_internlm_xcomposer2, version='v2.5'),
architectures=['InternLMXComposer2ForCausalLM'],
model_arch=ModelArch.internlm_xcomposer,
model_arch=ModelArch.xcomposer,
tags=['vision'],
requires=['decord'],
))
Expand All @@ -250,7 +250,7 @@ def get_model_tokenizer_internvl(model_dir: str,
TemplateType.xcomposer2,
get_model_tokenizer_internlm_xcomposer2,
architectures=['InternLMXComposer2ForCausalLM'],
model_arch=ModelArch.internlm_xcomposer,
model_arch=ModelArch.xcomposer,
tags=['vision'],
))

Expand All @@ -265,6 +265,6 @@ def get_model_tokenizer_internvl(model_dir: str,
TemplateType.xcomposer2,
partial(get_model_tokenizer_internlm_xcomposer2, version='v2-4khd'),
architectures=['InternLM2ForCausalLM', 'InternLMXComposer2ForCausalLM'],
model_arch=ModelArch.internlm_xcomposer,
model_arch=ModelArch.xcomposer,
tags=['vision'],
))
5 changes: 2 additions & 3 deletions swift/llm/model/model/microsoft.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from functools import partial
from types import MethodType
from typing import Any, Dict

Expand All @@ -18,8 +19,6 @@ def get_model_tokenizer_phi3_vision(model_dir: str,
load_model: bool = True,
**kwargs):
processor_kwargs = {}
if 'Phi-3.5-vision-instruct' in model_dir:
kwargs['num_crops'] = kwargs.get('num_crops') or 4
if 'num_crops' in kwargs:
processor_kwargs['num_crops'] = get_env_args('num_crops', int, kwargs['num_crops'])
from transformers import AutoProcessor
Expand All @@ -43,7 +42,7 @@ def get_model_tokenizer_phi3_vision(model_dir: str,
])
],
TemplateType.phi3_vision,
get_model_tokenizer_phi3_vision,
partial(get_model_tokenizer_phi3_vision, num_crops=4),
architectures=['Phi3VForCausalLM'],
model_arch=ModelArch.phi3v,
requires=['transformers>=4.36'],
Expand Down
4 changes: 2 additions & 2 deletions swift/llm/model/model_arch.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class MLLMModelArch:
llava_llama = 'llava_llama'
llava_mistral = 'llava_mistral'

internlm_xcomposer = 'internlm_xcomposer'
xcomposer = 'xcomposer'
internvl = 'internvl'
minicpmv = 'minicpmv'
deepseek_vl = 'deepseek_vl'
Expand Down Expand Up @@ -330,7 +330,7 @@ def register_model_arch(model_arch: ModelKeys, *, exist_ok: bool = False) -> Non

register_model_arch(
MultiModelKeys(
MLLMModelArch.internlm_xcomposer,
MLLMModelArch.xcomposer,
language_model='model',
aligner='vision_proj',
vision_tower='vit',
Expand Down
7 changes: 3 additions & 4 deletions swift/llm/template/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -678,7 +678,7 @@ def pre_forward_hook(self, model: nn.Module, args, kwargs):
k: v
for k, v in kwargs.items() if k in {'input_ids', 'labels', 'attention_mask', 'position_ids'}
}
keep_kwargs.update(self._post_encode(model, to_device(kwargs, model.device)))
keep_kwargs.update(to_device(self._post_encode(model, to_device(kwargs, model.device)), model.device))
kwargs = keep_kwargs
if 'inputs_embeds' in kwargs:
kwargs.pop('input_ids', None)
Expand Down Expand Up @@ -755,10 +755,9 @@ def _fetch_inputs_startswith(batch: List[Dict[str, Any]], prefix: str) -> List[D
@staticmethod
def fetch_inputs(batch: List[Dict[str, Any]], keys: Optional[List[str]] = None) -> Dict[str, Any]:
from swift.llm import RowPreprocessor
keys = keys or []
rows = RowPreprocessor.rows_to_batched(batch)
if keys is not None:
rows = {k: rows[k] for k in keys}
return rows
return {k: rows[k] for k in keys if rows.get(k) is not None}

@staticmethod
def gather_list(batch: List[Dict[str, Any]], attr_name: str) -> Optional[List[Any]]:
Expand Down
96 changes: 50 additions & 46 deletions swift/llm/template/template/internlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,57 +63,61 @@ def _encode(self, inputs: StdTemplateInputs) -> Dict[str, Any]:
return encoded

def _post_encode(self, model, inputs: Dict[str, Any]) -> Dict[str, Any]:
input_ids = inputs['input_ids'][0].tolist()
labels = inputs.get('labels')
images = inputs['images']
if len(images) > 0: # ignore <s>
input_ids = input_ids[1:]
batch_size = len(inputs['input_ids'])
res = []
im_mask = []
length = inputs['length']
for i in range(batch_size):
input_ids = inputs['input_ids'][i].tolist()[:length[i]]
input_ids.append(2) # add dummy </s>
labels = inputs.get('labels')
if labels is not None:
labels = labels[1:]
input_ids.append(2) # add dummy </s>
if labels is not None:
labels = labels[0].tolist()
labels.append(2)
else:
labels = []
res_inputs_embeds = []
res_labels = []
wrap_im_mask = []
pre_i, i, idx = 0, 0, 0
device = model.device
internlm2_model = model.model
if not hasattr(internlm2_model, 'tok_embeddings'):
internlm2_model = internlm2_model.model
tok_embeddings = internlm2_model.tok_embeddings
if len(images) > 0:
images = torch.concat([model.img2emb(image[None])[0] for image in images], dim=0)
while i < len(input_ids):
if input_ids[i] == 2: # replace_token
res_input_ids = torch.tensor([1] + input_ids[pre_i:i], device=device)
res_inputs_embeds.append(tok_embeddings(res_input_ids[None])[0])
wrap_im_mask += [0] * len(res_input_ids)
res_labels += [-100] + labels[pre_i:i]
if len(images) > 0 and idx < images.shape[0]:
res_inputs_embeds.append(images[idx].to(device))
wrap_im_mask += [1] * images.shape[1]
res_labels += [-100] * images.shape[1]
idx += 1
labels = labels[i].tolist()[:length[i]]
labels.append(2)
else:
labels = []
images = inputs['images'][i]
res_inputs_embeds = []
res_labels = []
wrap_im_mask = []
pre_i, i, idx = 0, 0, 0
device = model.device
internlm2_model = model.model
if not hasattr(internlm2_model, 'tok_embeddings'):
internlm2_model = internlm2_model.model
tok_embeddings = internlm2_model.tok_embeddings
if len(images) > 0:
images = torch.concat([model.img2emb(image[None])[0] for image in images], dim=0)
add_bos = False
while i < len(input_ids):
if input_ids[i] == 2: # replace_token
res_input_ids = torch.tensor(([1] if add_bos else []) + input_ids[pre_i:i], device=device)
if not add_bos and self.version != 'v2.5':
add_bos = True
res_inputs_embeds.append(tok_embeddings(res_input_ids[None])[0])
wrap_im_mask += [0] * len(res_input_ids)
res_labels += ([-100] if add_bos else []) + labels[pre_i:i]
if len(images) > 0 and idx < images.shape[0]:
res_inputs_embeds.append(images[idx].to(device))
wrap_im_mask += [1] * images.shape[1]
res_labels += [-100] * images.shape[1]
idx += 1
i += 1
pre_i = i
continue
i += 1
pre_i = i
continue
i += 1
if len(labels) == 0:
res_labels = None
res_inputs_embeds = torch.concat(res_inputs_embeds, dim=0)
wrap_im_mask = torch.tensor(wrap_im_mask, dtype=torch.bool, device=device)[None]
return {'inputs_embeds': res_inputs_embeds, 'im_mask': wrap_im_mask, 'labels': res_labels}
if len(labels) == 0:
res_labels = None
im_mask.append(torch.tensor(wrap_im_mask, dtype=torch.bool, device=device))
res.append({'inputs_embeds': torch.concat(res_inputs_embeds, dim=0), 'labels': res_labels})
res = Template._data_collator(self, res)
res['im_mask'] = self._pad_sequence(im_mask, 0)
return res

def _data_collator(self, batch: List[Dict[str, Any]], *, padding_to: Optional[int] = None) -> Dict[str, Any]:
res = super()._data_collator(batch, padding_to=padding_to)
if 'im_mask' in batch[0]:
im_mask = [b['im_mask'][0] for b in batch]
im_mask = self._pad_sequence(im_mask, 0)
res['im_mask'] = im_mask
res['length'] = [len(b['input_ids']) for b in batch]
res.update(self.fetch_inputs(batch, ['images']))
return res


Expand Down
Loading
Loading