|
26 | 26 | import paddle
|
27 | 27 | import paddle.distributed as dist
|
28 | 28 | from paddle.distributed import fleet
|
| 29 | +from paddle import in_dynamic_mode |
29 | 30 |
|
30 | 31 | from paddlenlp.ops import Topology
|
31 | 32 | from paddlenlp.trainer import (
|
@@ -450,17 +451,12 @@ def main():
|
450 | 451 | model_args, data_args, training_args = parser.parse_args_into_dataclasses()
|
451 | 452 |
|
452 | 453 | if training_args.enable_linear_fused_grad_add:
|
453 |
| - from fused_layers import mock_layers |
454 |
| - |
455 |
| - mock_layers() |
456 |
| - |
457 |
| - if "replace_with_parallel_cross_entropy" in training_args.tensor_parallel_config: |
458 |
| - utils_path = "/workspace/PaddleNLP/llm/utils" |
| 454 | + utils_path = "/root/paddlejob/workspace/env_run/shenfangjian/PaddleNLP/llm/utils" |
459 | 455 | sys.path.append(utils_path)
|
460 | 456 |
|
461 |
| - from replace_ops import replace_cross_entropy |
| 457 | + from fused_layers import mock_layers |
462 | 458 |
|
463 |
| - replace_cross_entropy() |
| 459 | + mock_layers() |
464 | 460 |
|
465 | 461 | if model_args.tokenizer_name_or_path is None:
|
466 | 462 | model_args.tokenizer_name_or_path = model_args.model_name_or_path
|
@@ -564,6 +560,14 @@ def main():
|
564 | 560 |
|
565 | 561 | print("Final pre-training config:", config)
|
566 | 562 |
|
| 563 | + if "replace_with_parallel_cross_entropy" in training_args.tensor_parallel_config and config.tensor_parallel_degree > 1 and in_dynamic_mode(): |
| 564 | + utils_path = "/root/paddlejob/workspace/env_run/shenfangjian/PaddleNLP/llm/utils" |
| 565 | + sys.path.append(utils_path) |
| 566 | + |
| 567 | + from replace_ops import replace_cross_entropy |
| 568 | + |
| 569 | + replace_cross_entropy() |
| 570 | + |
567 | 571 | # # Set the dtype for loading model
|
568 | 572 | # dtype = "float32"
|
569 | 573 | # if training_args.fp16_opt_level == "O2":
|
|
0 commit comments