File tree Expand file tree Collapse file tree 1 file changed +2
-2
lines changed
examples/experiments/deepseek_v3_pretrain Expand file tree Collapse file tree 1 file changed +2
-2
lines changed Original file line number Diff line number Diff line change 4848 LinearAnnealingWithWarmupDecay ,
4949)
5050from paddleformers .transformers .configuration_utils import LlmMetaConfig , llmmetaclass
51- from paddleformers .transformers .deepseek_v3 import DeepseekV2ForCausalLM
51+ from paddleformers .transformers .deepseek_v3 import DeepseekV3ForCausalLM
5252from paddleformers .utils .batch_sampler import DistributedBatchSampler
5353from paddleformers .utils .log import logger
5454
@@ -480,7 +480,7 @@ def main():
480480 if training_args .bf16 :
481481 dtype = "bfloat16"
482482
483- model_class = DeepseekV2ForCausalLM
483+ model_class = DeepseekV3ForCausalLM
484484 if training_args .pipeline_parallel_degree > 1 :
485485 model_class = DeepseekV2ForCausalLMPipe
486486 if "LLama" in str (config .architectures ):
You can’t perform that action at this time.
0 commit comments