Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

vera-pissa method added #8722

Merged
merged 11 commits into from
Jul 23, 2024
3 changes: 1 addition & 2 deletions llm/tools/merge_vera_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,7 @@ def merge():
) and args.device == "cpu":
raise ValueError("We can not apply bfloat16 or nf4/fp4 vera merge on cpu.")

vera_config.merge_weights = False
# with device_guard(): 会导致svd无法进行分解
# with device_guard() will cause SVD decomposition to fail
model = AutoModelForCausalLM.from_pretrained(
vera_config.base_model_name_or_path,
config=config,
Expand Down
1 change: 0 additions & 1 deletion paddlenlp/peft/vera/vera_layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@
self.merged = False

if pissa_init:
assert self.vera_alpha == self.r, "pissa method requires vera_alpha=r, scaling=1"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

为什么把这个删除了

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

为了增加代码的覆盖率,重新加回去了并添加相应的异常测试

self.scaling = 1.0
self.vera_A = self.create_parameter(
shape=[in_features, r],
Expand Down Expand Up @@ -102,7 +101,7 @@
dtype = weight.dtype

if dtype != paddle.float32:
weight = weight.astype(paddle.float32)

Check warning on line 104 in paddlenlp/peft/vera/vera_layers.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/peft/vera/vera_layers.py#L104

Added line #L104 was not covered by tests

U, S, Vh = paddle.linalg.svd(weight.data, full_matrices=False)

Expand All @@ -120,20 +119,20 @@
self.weight.set_value(weight)

def merge(self):
if not self.merged:
diag_b = paddle.diag(self.vera_b)
diag_d = paddle.diag(self.vera_d)
new_weight = self.weight + self.vera_A @ diag_d @ self.vera_B @ diag_b * self.scaling
self.weight.set_value(new_weight)
self.merged = True

Check warning on line 127 in paddlenlp/peft/vera/vera_layers.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/peft/vera/vera_layers.py#L122-L127

Added lines #L122 - L127 were not covered by tests

def unmerge(self):
if self.merged:
diag_b = paddle.diag(self.vera_b)
diag_d = paddle.diag(self.vera_d)
new_weight = self.weight - self.vera_A @ diag_d @ self.vera_B @ diag_b * self.scaling
self.weight.set_value(new_weight)
self.merged = False

Check warning on line 135 in paddlenlp/peft/vera/vera_layers.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/peft/vera/vera_layers.py#L130-L135

Added lines #L130 - L135 were not covered by tests

def forward(self, input: paddle.Tensor, *args, **kwargs):
result = F.linear(x=input, weight=self.weight, bias=self.bias, name=self.name)
Expand All @@ -145,5 +144,5 @@
return result

def extra_repr(self):
name = f", name={self.name}" if self.name else ""
return f"in_features={self.weight.shape[0]}, out_features={self.weight.shape[1]}, rank={self.r}{name}"

Check warning on line 148 in paddlenlp/peft/vera/vera_layers.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/peft/vera/vera_layers.py#L147-L148

Added lines #L147 - L148 were not covered by tests
8 changes: 0 additions & 8 deletions paddlenlp/peft/vera/vera_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
from paddle.distributed.fleet.meta_parallel import PipelineLayer

from ...transformers.model_utils import PretrainedModel, _add_variant, dtype_guard
from ...transformers.utils import weight_name_suffix
from ...utils.env import VERA_WEIGHTS_NAME
from ...utils.log import logger
from .vera_config import VeRAConfig
Expand All @@ -46,9 +45,9 @@
self.model = self.get_vera_model(model, vera_config)
self.is_pipelinemodel = False
if issubclass(type(self.model), PipelineLayer):
raise NotImplementedError("vera don't support pipeline parallel now")

Check warning on line 48 in paddlenlp/peft/vera/vera_model.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/peft/vera/vera_model.py#L48

Added line #L48 was not covered by tests
if vera_config.tensor_parallel_degree > 1:
raise NotImplementedError("vera don't support tensor parallel now")

Check warning on line 50 in paddlenlp/peft/vera/vera_model.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/peft/vera/vera_model.py#L50

Added line #L50 was not covered by tests
self.forward = self.model.forward

@classmethod
Expand Down Expand Up @@ -77,14 +76,14 @@
vera_config_tensor_parallel_degree > 1
and vera_config_tensor_parallel_degree != model.config.tensor_parallel_degree
):
raise NotImplementedError(

Check warning on line 79 in paddlenlp/peft/vera/vera_model.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/peft/vera/vera_model.py#L79

Added line #L79 was not covered by tests
f"{vera_config_tensor_parallel_degree} is not equal to {model.config.tensor_parallel_degree}. Please merge VeRA weights first."
)

# set vera state dict
vera_model.set_state_dict(vera_state_dict)
else:
logger.error(f"VeRA weights not found under {vera_path}, creating VeRA weights from scratch")

Check warning on line 86 in paddlenlp/peft/vera/vera_model.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/peft/vera/vera_model.py#L86

Added line #L86 was not covered by tests

return vera_model

Expand All @@ -103,15 +102,15 @@
save_model_config = kwargs.get("save_model_config", True)

if self.is_pipelinemodel:
self.model._single_to_pp_mapping = None

Check warning on line 105 in paddlenlp/peft/vera/vera_model.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/peft/vera/vera_model.py#L105

Added line #L105 was not covered by tests
if self.quantized and merge_tensor_parallel and self.vera_config.tensor_parallel_degree > 1:
merge_tensor_parallel = False
logger.warning(

Check warning on line 108 in paddlenlp/peft/vera/vera_model.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/peft/vera/vera_model.py#L107-L108

Added lines #L107 - L108 were not covered by tests
"Quantized strategy does not support merge_tensor_parallel. Set merge_tensor_parallel to False."
)
if self.is_pipelinemodel and merge_tensor_parallel and self.vera_config.tensor_parallel_degree > 1:
merge_tensor_parallel = False
logger.warning(

Check warning on line 113 in paddlenlp/peft/vera/vera_model.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/peft/vera/vera_model.py#L112-L113

Added lines #L112 - L113 were not covered by tests
"Pipeline parallism does not support merge_tensor_parallel. Set merge_tensor_parallel to False."
)

Expand All @@ -128,9 +127,6 @@
logger.info(f"vera config to save is {vera_config_to_save}")

trainable_state_dict = self.get_trainable_state_dict()
if vera_config_to_save.tensor_parallel_degree > 1:
if variant is None:
variant = weight_name_suffix()

# save vera weight
vera_weight_name = _add_variant(VERA_WEIGHTS_NAME, variant)
Expand All @@ -143,7 +139,7 @@
if save_model_config:
model_config_to_save = copy.deepcopy(self.model.config)
if merge_tensor_parallel:
model_config_to_save.tensor_parallel_degree = -1

Check warning on line 142 in paddlenlp/peft/vera/vera_model.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/peft/vera/vera_model.py#L142

Added line #L142 was not covered by tests
model_config_to_save.save_pretrained(save_directory)

def _find_and_replace_module(self, model, module_name, vera_config, enable_vera):
Expand Down Expand Up @@ -178,17 +174,17 @@
setattr(parent_module, attribute_chain[-1], vera_module)

def _find_and_restore_module(self, module_name):
parent_module = self.model
attribute_chain = module_name.split(".")
for name in attribute_chain[:-1]:
parent_module = getattr(parent_module, name)
module = getattr(parent_module, attribute_chain[-1])
original_model_class = self.restore_layer_map[module.__class__]
original_module = original_model_class(in_features=module.weight.shape[0], out_features=module.weight.shape[1])
original_module.weight = module.weight
if module.bias is not None:
original_module.bias = module.bias
setattr(parent_module, attribute_chain[-1], original_module)

Check warning on line 187 in paddlenlp/peft/vera/vera_model.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/peft/vera/vera_model.py#L177-L187

Added lines #L177 - L187 were not covered by tests

def get_trainable_state_dict(self):
trainable_state_dict = OrderedDict()
Expand All @@ -199,14 +195,14 @@
return trainable_state_dict

def print_trainable_parameters(self) -> None:
freeze_numel = 0
trainable_numel = 0
for _, weight in self.model.state_dict().items():
if weight.stop_gradient:
freeze_numel += np.prod(weight.shape)

Check warning on line 202 in paddlenlp/peft/vera/vera_model.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/peft/vera/vera_model.py#L198-L202

Added lines #L198 - L202 were not covered by tests
else:
trainable_numel += np.prod(weight.shape)
logger.debug(

Check warning on line 205 in paddlenlp/peft/vera/vera_model.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/peft/vera/vera_model.py#L204-L205

Added lines #L204 - L205 were not covered by tests
f"Frozen parameters: {freeze_numel:.2e} || Trainable parameters:{trainable_numel:.2e} || Total parameters:{freeze_numel+trainable_numel:.2e}|| Trainable:{trainable_numel / (freeze_numel+trainable_numel):.2%}"
)

Expand All @@ -215,14 +211,14 @@
if isinstance(layer, VeRALinear):
for name, weight in layer.state_dict().items():
if self.vera_config.trainable_bias in ["vera", "all"] and "bias" in name:
weight.stop_gradient = False

Check warning on line 214 in paddlenlp/peft/vera/vera_model.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/peft/vera/vera_model.py#L214

Added line #L214 was not covered by tests
elif "vera" in name:
# notfreezeB=True, vera_b, vera_d, vera_B is trainable
# notfreezeB=False, vera_b, vera_d is trainable
if "vera_b" in name or "vera_d" in name:
weight.stop_gradient = False
elif "vera_B" in name and notfreezeB:
weight.stop_gradient = False

Check warning on line 221 in paddlenlp/peft/vera/vera_model.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/peft/vera/vera_model.py#L221

Added line #L221 was not covered by tests
else:
weight.stop_gradient = True
else:
Expand All @@ -230,26 +226,26 @@
else:
for name, weight in layer.state_dict().items():
if self.vera_config.trainable_bias == "all" and "bias" in name:
weight.stop_gradient = False

Check warning on line 229 in paddlenlp/peft/vera/vera_model.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/peft/vera/vera_model.py#L229

Added line #L229 was not covered by tests
else:
weight.stop_gradient = True
if self.vera_config.trainable_modules is not None:
for name, weight in self.model.state_dict().items():
if any(

Check warning on line 234 in paddlenlp/peft/vera/vera_model.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/peft/vera/vera_model.py#L233-L234

Added lines #L233 - L234 were not covered by tests
re.fullmatch(trainable_module, name) for trainable_module in self.vera_config.trainable_modules
):
weight.stop_gradient = False

Check warning on line 237 in paddlenlp/peft/vera/vera_model.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/peft/vera/vera_model.py#L237

Added line #L237 was not covered by tests

def get_vera_model(self, model: Union[PretrainedModel, nn.Layer], vera_config: VeRAConfig):

if vera_config.target_modules is None:
return model

Check warning on line 242 in paddlenlp/peft/vera/vera_model.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/peft/vera/vera_model.py#L242

Added line #L242 was not covered by tests
elif isinstance(vera_config.target_modules, str):
target_modules = [vera_config.target_modules]
if vera_config.enable_vera_list is None:
enable_vera_list = [vera_config.enable_vera_list]

Check warning on line 246 in paddlenlp/peft/vera/vera_model.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/peft/vera/vera_model.py#L244-L246

Added lines #L244 - L246 were not covered by tests
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

enable_vera_list 这个应该是直接复用lora的,vera并没有对应的功能,建议把enable_vera_list相关全部删除,走代码里为None的分支就好

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

应该是在vera_config层面就把enable_vera_list全部删除,因为我们不需要这个参数,我看现在代码还保留着?

else:
raise TypeError(

Check warning on line 248 in paddlenlp/peft/vera/vera_model.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/peft/vera/vera_model.py#L248

Added line #L248 was not covered by tests
f"Invalid `enable_vera_list` value: {vera_config.enable_vera_list}. Since `target_modules` is `str`, `enable_vera_list` must be `None` or `List[bool]`"
)
else:
Expand All @@ -257,7 +253,7 @@
if vera_config.enable_vera_list is None:
enable_vera_list = [None for _ in range(len(target_modules))]
else:
raise TypeError(

Check warning on line 256 in paddlenlp/peft/vera/vera_model.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/peft/vera/vera_model.py#L256

Added line #L256 was not covered by tests
f"Invalid `enable_vera_list` value: {vera_config.enable_vera_list}. Since `target_modules` is `List[str]`, `enable_vera_list` must be `None` or `List[Optional[List[bool]]]`"
)

Expand All @@ -269,23 +265,19 @@
return model

def restore_original_model(self):
# make sure W and vera weights are not merged before we restore the original model
if self.vera_config.merge_weights:
self.train()

for layer_name, layer in self.model.named_sublayers():
if isinstance(layer, VeRALinear):
self._find_and_restore_module(layer_name)

Check warning on line 270 in paddlenlp/peft/vera/vera_model.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/peft/vera/vera_model.py#L270

Added line #L270 was not covered by tests
else:
raise NotImplementedError(f"{layer} restoration is not supported yet.")
return self.model

Check warning on line 273 in paddlenlp/peft/vera/vera_model.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/peft/vera/vera_model.py#L273

Added line #L273 was not covered by tests

def __getattr__(self, name: str):
"""Forward missing attributes to the wrapped module."""
try:
return super().__getattr__(name) # defer to nn.Layer's logic
except AttributeError:
return getattr(self.model, name)

Check warning on line 280 in paddlenlp/peft/vera/vera_model.py

View check run for this annotation

Codecov / codecov/patch

paddlenlp/peft/vera/vera_model.py#L279-L280

Added lines #L279 - L280 were not covered by tests

def train(self):
self.training = True
Expand Down
15 changes: 15 additions & 0 deletions test_file.txt/vera_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"base_model_name_or_path": null,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个文件的作用是什么?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

测试用的,已删除,done

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

已把vera_config层就把enable_vera_list全部删除

"do_qat": false,
"dtype": null,
"enable_vera_list": null,
"head_dim": null,
"pissa_init": false,
"r": 8,
"target_modules": null,
"tensor_parallel_degree": -1,
"trainable_bias": null,
"trainable_modules": null,
"vera_alpha": 8,
"vera_dropout": 0.0
}
37 changes: 31 additions & 6 deletions tests/peft/test_vera.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import os
import re
import unittest
from tempfile import TemporaryDirectory
from tempfile import NamedTemporaryFile, TemporaryDirectory

import numpy as np
import paddle
Expand All @@ -41,7 +41,13 @@ def test_r_raise_exception(self):

def test_forward(self):
vera_layer = VeRALinear(
in_features=16, out_features=16, r=4, vera_dropout=0.1, vera_alpha=4, base_linear_module=nn.Linear(16, 16)
in_features=16,
out_features=16,
r=4,
vera_dropout=0.1,
vera_alpha=4,
base_linear_module=nn.Linear(16, 16),
pissa_init=True,
)
x = paddle.randn([2, 4, 16], "float32")
output = vera_layer(x)
Expand Down Expand Up @@ -104,10 +110,7 @@ class TestVeraModel(unittest.TestCase):
@parameterized.expand([(None,), ("all",), ("vera",)])
def test_vera_model_constructor(self, bias):
vera_config = VeRAConfig(
target_modules=[".*q_proj.*", ".*v_proj.*"],
r=4,
vera_alpha=4,
head_dim=2,
target_modules=[".*q_proj.*", ".*v_proj.*"], r=4, vera_alpha=4, head_dim=2, pissa_init=True
)
# turn off plm dropout for to test train vs test
model = AutoModel.from_pretrained(
Expand Down Expand Up @@ -156,6 +159,17 @@ def test_vera_model_save_load(self):
config_loaded_results = config_loaded_vera_model(input_ids)
self.assertTrue(paddle.allclose(original_results[0], config_loaded_results[0]))

def test_restore_original_model(self):
vera_config = VeRAConfig(
target_modules=[".*q_proj.*", ".*v_proj.*"],
r=4,
vera_alpha=4,
)
model = AutoModel.from_pretrained("__internal_testing__/tiny-random-bert")
vera_model = VeRAModel(model, vera_config)
with self.assertRaises(NotImplementedError):
vera_model.restore_original_model()

def test_vera_module_raise_exception(self):
vera_config = VeRAConfig(
target_modules=[".*norm1.*"],
Expand All @@ -176,6 +190,17 @@ def test_save_load(self):
loaded_vera_config = VeRAConfig.from_pretrained(tempdir)
self.assertEqual(vera_config, loaded_vera_config)

def test_save_load_err(self):
with NamedTemporaryFile("w+t") as f:
with self.assertRaises(ValueError):
VeRAConfig.from_pretrained(f.name)

def test_save_pretrained_file_error(self):
with NamedTemporaryFile("w+t") as f:
vera_config = VeRAConfig()
with self.assertRaises(AssertionError):
vera_config.save_pretrained(f.name)


if __name__ == "__main__":
unittest.main()
Loading