Skip to content

Commit ddef3ff

Browse files
committed
remove dsv2
1 parent 769718c commit ddef3ff

File tree

15 files changed

+2415
-3274
lines changed

15 files changed

+2415
-3274
lines changed

examples/experiments/deepseek_v3_pretrain/modeling.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -71,13 +71,13 @@
7171
StateDictNameMapping,
7272
init_name_mappings,
7373
)
74-
from paddleformers.transformers.deepseek_v2 import fp8_linear as linear_utils
75-
from paddleformers.transformers.deepseek_v2 import (
74+
from paddleformers.transformers.deepseek_v3 import fp8_linear as linear_utils
75+
from paddleformers.transformers.deepseek_v3 import (
7676
rotate_half,
7777
scaled_dot_product_attention,
7878
yarn_get_mscale,
7979
)
80-
from paddleformers.transformers.deepseek_v2.fp8_linear import Linear as Linear_
80+
from paddleformers.transformers.deepseek_v3.fp8_linear import Linear as Linear_
8181
from paddleformers.transformers.fp8_utils import (
8282
FP8KeepXLinear,
8383
FP8Linear,
@@ -1221,7 +1221,7 @@ class DeepseekV2PretrainedModelFast(PretrainedModel):
12211221
_no_split_modules = ["DeepseekV2DecoderLayer"]
12221222

12231223
def _get_model_flops(self, batch_size=1, seq_length=None, **kwargs):
1224-
from paddleformers.transformers.deepseek_v2.mfu_utils import DeepSeekProjection
1224+
from paddleformers.transformers.deepseek_v3.mfu_utils import DeepSeekProjection
12251225

12261226
# self._
12271227
mfu_cal_proj = DeepSeekProjection(self.config)

examples/experiments/deepseek_v3_pretrain/run_pretrain.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848
LinearAnnealingWithWarmupDecay,
4949
)
5050
from paddleformers.transformers.configuration_utils import LlmMetaConfig, llmmetaclass
51-
from paddleformers.transformers.deepseek_v2 import DeepseekV2ForCausalLM
51+
from paddleformers.transformers.deepseek_v3 import DeepseekV2ForCausalLM
5252
from paddleformers.utils.batch_sampler import DistributedBatchSampler
5353
from paddleformers.utils.log import logger
5454

paddleformers/cli/train/pretrain/workflow.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@
5353
LinearAnnealingWithWarmupDecay,
5454
)
5555
from paddleformers.transformers.configuration_utils import LlmMetaConfig, llmmetaclass
56-
from paddleformers.transformers.deepseek_v2 import DeepseekV2ForCausalLM
56+
from paddleformers.transformers.deepseek_v3 import DeepseekV3ForCausalLM
5757
from paddleformers.utils.batch_sampler import DistributedBatchSampler
5858
from paddleformers.utils.log import logger
5959

@@ -485,7 +485,7 @@ def run_dsv3_pretrain(model_args, data_args, generating_args, training_args):
485485
if training_args.bf16:
486486
dtype = "bfloat16"
487487

488-
model_class = DeepseekV2ForCausalLM
488+
model_class = DeepseekV3ForCausalLM
489489
if training_args.pipeline_parallel_degree > 1:
490490
model_class = DeepseekV2ForCausalLMPipe
491491
if "LLama" in str(config.architectures):

paddleformers/transformers/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -366,7 +366,6 @@
366366
from .auto.processing import *
367367
from .auto.tokenizer import *
368368
from .auto.video_processing import *
369-
from .deepseek_v2 import *
370369
from .deepseek_v3 import *
371370
from .ernie4_5 import *
372371
from .ernie4_5_moe import *

paddleformers/transformers/auto/modeling.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@
5353
MAPPING_NAMES = OrderedDict(
5454
[
5555
("Bert", "bert"),
56-
("DeepseekV2", "deepseek_v2"),
5756
("DeepseekV3", "deepseek_v3"),
5857
("Ernie4_5", "ernie4_5"),
5958
("Ernie4_5_Moe", "ernie4_5_moe"),

paddleformers/transformers/deepseek_v2/__init__.py

Lines changed: 0 additions & 90 deletions
This file was deleted.

paddleformers/transformers/deepseek_v2/configuration.py

Lines changed: 0 additions & 237 deletions
This file was deleted.

0 commit comments

Comments
 (0)