modelscope · DaozeZhang · Oct 21, 2024 · Sep 10, 2024 · Sep 11, 2024 · Sep 20, 2024
diff --git a/.gitignore b/.gitignore
@@ -133,6 +133,7 @@ benchmarks/
 eval_outputs/
 transformers/
 vlmeval/
+my_model/
 
 # Pytorch
 *.pth

diff --git a/docs/source/Instruction/支持的模型和数据集.md b/docs/source/Instruction/支持的模型和数据集.md
@@ -199,6 +199,7 @@
 |llama3_1-405b-instruct-awq|[LLM-Research/Meta-Llama-3.1-405B-Instruct-AWQ-INT4](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-405B-Instruct-AWQ-INT4/summary)|q_proj, k_proj, v_proj|llama3|&#x2714;|&#x2714;|&#x2714;|&#x2718;|transformers>=4.43, autoawq|-|[hugging-quants/Meta-Llama-3.1-405B-Instruct-AWQ-INT4](https://huggingface.co/hugging-quants/Meta-Llama-3.1-405B-Instruct-AWQ-INT4)|
 |llama3_1-405b-instruct-gptq-int4|[LLM-Research/Meta-Llama-3.1-405B-Instruct-GPTQ-INT4](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-405B-Instruct-GPTQ-INT4/summary)|q_proj, k_proj, v_proj|llama3|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.43, auto_gptq|-|[hugging-quants/Meta-Llama-3.1-405B-Instruct-GPTQ-INT4](https://huggingface.co/hugging-quants/Meta-Llama-3.1-405B-Instruct-GPTQ-INT4)|
 |llama3_1-405b-instruct-bnb|[LLM-Research/Meta-Llama-3.1-405B-Instruct-BNB-NF4](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-405B-Instruct-BNB-NF4/summary)|q_proj, k_proj, v_proj|llama3|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.43, bitsandbytes|-|[hugging-quants/Meta-Llama-3.1-405B-Instruct-BNB-NF4](https://huggingface.co/hugging-quants/Meta-Llama-3.1-405B-Instruct-BNB-NF4)|
+|llama-3.1-nemotron-70B-instruct-hf|[AI-ModelScope/Llama-3.1-Nemotron-70B-Instruct-HF](https://modelscope.cn/models/AI-ModelScope/Llama-3.1-Nemotron-70B-Instruct-HF/summary)|q_proj, k_proj, v_proj|llama3|&#x2714;|&#x2714;|&#x2714;|&#x2718;|transformers>=4.43|-|[nvidia/Llama-3.1-Nemotron-70B-Instruct-HF](https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF)|
 |llama3_2-1b|[LLM-Research/Llama-3.2-1B](https://modelscope.cn/models/LLM-Research/Llama-3.2-1B/summary)|q_proj, k_proj, v_proj|default-generation|&#x2714;|&#x2714;|&#x2714;|&#x2718;|transformers>=4.45|-|[meta-llama/Llama-3.2-1B](https://huggingface.co/meta-llama/Llama-3.2-1B)|
 |llama3_2-1b-instruct|[LLM-Research/Llama-3.2-1B-Instruct](https://modelscope.cn/models/LLM-Research/Llama-3.2-1B-Instruct/summary)|q_proj, k_proj, v_proj|llama3_2|&#x2714;|&#x2714;|&#x2714;|&#x2718;|transformers>=4.45|-|[meta-llama/Llama-3.2-1B-Instruct](https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct)|
 |llama3_2-3b|[LLM-Research/Llama-3.2-3B](https://modelscope.cn/models/LLM-Research/Llama-3.2-3B/summary)|q_proj, k_proj, v_proj|default-generation|&#x2714;|&#x2714;|&#x2714;|&#x2718;|transformers>=4.45|-|[meta-llama/Llama-3.2-3B](https://huggingface.co/meta-llama/Llama-3.2-3B)|

diff --git a/docs/source_en/Instruction/Supported-models-datasets.md b/docs/source_en/Instruction/Supported-models-datasets.md
@@ -199,6 +199,7 @@ The table below introcudes all models supported by SWIFT:
 |llama3_1-405b-instruct-awq|[LLM-Research/Meta-Llama-3.1-405B-Instruct-AWQ-INT4](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-405B-Instruct-AWQ-INT4/summary)|q_proj, k_proj, v_proj|llama3|&#x2714;|&#x2714;|&#x2714;|&#x2718;|transformers>=4.43, autoawq|-|[hugging-quants/Meta-Llama-3.1-405B-Instruct-AWQ-INT4](https://huggingface.co/hugging-quants/Meta-Llama-3.1-405B-Instruct-AWQ-INT4)|
 |llama3_1-405b-instruct-gptq-int4|[LLM-Research/Meta-Llama-3.1-405B-Instruct-GPTQ-INT4](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-405B-Instruct-GPTQ-INT4/summary)|q_proj, k_proj, v_proj|llama3|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.43, auto_gptq|-|[hugging-quants/Meta-Llama-3.1-405B-Instruct-GPTQ-INT4](https://huggingface.co/hugging-quants/Meta-Llama-3.1-405B-Instruct-GPTQ-INT4)|
 |llama3_1-405b-instruct-bnb|[LLM-Research/Meta-Llama-3.1-405B-Instruct-BNB-NF4](https://modelscope.cn/models/LLM-Research/Meta-Llama-3.1-405B-Instruct-BNB-NF4/summary)|q_proj, k_proj, v_proj|llama3|&#x2714;|&#x2714;|&#x2718;|&#x2718;|transformers>=4.43, bitsandbytes|-|[hugging-quants/Meta-Llama-3.1-405B-Instruct-BNB-NF4](https://huggingface.co/hugging-quants/Meta-Llama-3.1-405B-Instruct-BNB-NF4)|
+|llama-3.1-nemotron-70B-instruct-hf|[AI-ModelScope/Llama-3.1-Nemotron-70B-Instruct-HF](https://modelscope.cn/models/AI-ModelScope/Llama-3.1-Nemotron-70B-Instruct-HF/summary)|q_proj, k_proj, v_proj|llama3|&#x2714;|&#x2714;|&#x2714;|&#x2718;|transformers>=4.43|-|[nvidia/Llama-3.1-Nemotron-70B-Instruct-HF](https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF)|
 |llama3_2-1b|[LLM-Research/Llama-3.2-1B](https://modelscope.cn/models/LLM-Research/Llama-3.2-1B/summary)|q_proj, k_proj, v_proj|default-generation|&#x2714;|&#x2714;|&#x2714;|&#x2718;|transformers>=4.45|-|[meta-llama/Llama-3.2-1B](https://huggingface.co/meta-llama/Llama-3.2-1B)|
 |llama3_2-1b-instruct|[LLM-Research/Llama-3.2-1B-Instruct](https://modelscope.cn/models/LLM-Research/Llama-3.2-1B-Instruct/summary)|q_proj, k_proj, v_proj|llama3_2|&#x2714;|&#x2714;|&#x2714;|&#x2718;|transformers>=4.45|-|[meta-llama/Llama-3.2-1B-Instruct](https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct)|
 |llama3_2-3b|[LLM-Research/Llama-3.2-3B](https://modelscope.cn/models/LLM-Research/Llama-3.2-3B/summary)|q_proj, k_proj, v_proj|default-generation|&#x2714;|&#x2714;|&#x2714;|&#x2718;|transformers>=4.45|-|[meta-llama/Llama-3.2-3B](https://huggingface.co/meta-llama/Llama-3.2-3B)|

diff --git a/swift/llm/utils/model.py b/swift/llm/utils/model.py
@@ -261,6 +261,8 @@ class ModelType:
     llama3_1_405b_instruct_awq = 'llama3_1-405b-instruct-awq'
     llama3_1_405b_instruct_gptq_int4 = 'llama3_1-405b-instruct-gptq-int4'
     llama3_1_405b_instruct_bnb = 'llama3_1-405b-instruct-bnb'
+    # llama3.1-nemotron
+    llama3_1_nemotron_70B_instruct_hf = 'llama-3.1-nemotron-70B-instruct-hf'
     # llama3.2
     llama3_2_1b = 'llama3_2-1b'
     llama3_2_1b_instruct = 'llama3_2-1b-instruct'
@@ -4892,6 +4894,17 @@ def get_model_tokenizer_deepseek_vl(model_dir: str,
     return model, tokenizer
 
 
+@register_model(
+    ModelType.llama3_1_nemotron_70B_instruct_hf,
+    'AI-ModelScope/Llama-3.1-Nemotron-70B-Instruct-HF',
+    LoRATM.llama,
+    TemplateType.llama3,
+    support_flash_attn=True,
+    support_vllm=True,
+    support_lmdeploy=True,
+    requires=['transformers>=4.43'],
+    ignore_file_pattern=[r'.+\.pth$'],
+    hf_model_id='nvidia/Llama-3.1-Nemotron-70B-Instruct-HF')
 @register_model(
     ModelType.openbuddy_llama3_1_8b_chat,
     'OpenBuddy/openbuddy-llama3.1-8b-v22.1-131k',