1. fix hub ignore-pattern (modelscope#2848)

2. fix llamapro merge 3. fix adalora training
tastelikefeet · Jan 3, 2025 · feb172c · feb172c
1 parent a5dca04
commit feb172c
Show file tree

Hide file tree

Showing 3 changed files with 11 additions and 6 deletions.
diff --git a/swift/hub/hub.py b/swift/hub/hub.py
@@ -273,7 +273,7 @@ def push_to_hub(cls,
             token or cls.ms_token,
             private,
             commit_message=commit_message,
-            ignore_patterns=ignore_patterns,
+            ignore_file_pattern=ignore_patterns,
             revision=revision,
             tag=path_in_repo)
 

diff --git a/swift/tuners/llamapro.py b/swift/tuners/llamapro.py
@@ -49,15 +49,17 @@ def prepare_model(model: nn.Module, config: LLaMAProConfig, adapter_name: str) -
         num_hidden_layers = HfConfigFactory.get_config_attr(model.config, 'num_hidden_layers')
         if num_hidden_layers is None:
             num_hidden_layers = HfConfigFactory.get_config_attr(model.config, 'num_layers')
-
         assert num_hidden_layers is not None, 'Cannot find num of layers config'
         assert num_hidden_layers % config.num_new_blocks == 0, f'Model layers {num_hidden_layers} ' \
                                                                f'should be divided by {config.num_new_blocks}'
         if config.num_groups is None:
             config.num_groups = config.num_new_blocks
 
+        # the except block will change the model_type, this will cause `model not found` error
+        # when using internvl
+        origin_model_type = config.model_type
+        model_type = origin_model_type
         num_stride = num_hidden_layers // config.num_groups
-
         try:
             module_list = LLaMAPro._find_module_list(config, model)
         except AssertionError as e:
@@ -94,21 +96,22 @@ def prepare_model(model: nn.Module, config: LLaMAProConfig, adapter_name: str) -
         LLaMAPro._set_module_list(config, model, new_module_list)
 
         def state_dict_callback(state_dict, adapter_name, **kwargs):
-            model_key_mapping = LLaMAPro.get_model_key_mapping(config.model_type, config)
+            model_key_mapping = LLaMAPro.get_model_key_mapping(model_type, config)
             new_module_list = [model_key_mapping.module_list + f'.{i}' for i in new_module_idx]
             return {
                 key: value
                 for key, value in state_dict.items() if any([m_part in key for m_part in new_module_list])
             }
 
         def mark_trainable_callback(model):
-            model_key_mapping = LLaMAPro.get_model_key_mapping(config.model_type, config)
+            model_key_mapping = LLaMAPro.get_model_key_mapping(model_type, config)
             new_module_list = [model_key_mapping.module_list + f'.{i}' for i in new_module_idx]
             for name, parameter in model.named_parameters():
                 parameter: nn.Parameter
                 if any([m_part in name for m_part in new_module_list]):
                     parameter.requires_grad = True
 
+        config.model_type = origin_model_type
         return SwiftOutput(
             config=config, state_dict_callback=state_dict_callback, mark_trainable_callback=mark_trainable_callback)
 

diff --git a/swift/tuners/peft.py b/swift/tuners/peft.py
@@ -19,6 +19,7 @@
                   get_peft_model, get_peft_model_state_dict)
 from peft.config import PeftConfigMixin
 from peft.tuners import lora
+from peft.tuners.adalora import AdaLoraModel, RankAllocator
 from peft.tuners.lora import Embedding
 from transformers import Trainer
 
@@ -283,6 +284,8 @@ def hot_patch_peft_module():
     # Fix Lora does not support NonDynamicallyQuantizableLinear
     LoraModel._create_and_replace_origin = LoraModel._create_and_replace
     LoraModel._create_and_replace = _create_and_replace_hook
+    AdaLoraModel._create_and_replace_origin = AdaLoraModel._create_and_replace
+    AdaLoraModel._create_and_replace = _create_and_replace_hook
     VeraModel._create_and_replace_origin = VeraModel._create_and_replace
     VeraModel._create_and_replace = _create_and_replace_hook
     BOFTModel._create_and_replace_origin = BOFTModel._create_and_replace
@@ -328,7 +331,6 @@ def dummy_function(*args, **kwargs):
     PeftModel.set_active_adapters = partial(dummy_function, func='set_active_adapters')
 
     # Fix adalora does not support device_map
-    from peft.tuners.adalora import AdaLoraModel, RankAllocator
     AdaLoraModel.forward = adalora_forward
     RankAllocator.mask_to_budget = adalora_mask_to_budget